diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java index a1539dbaa39c..7f266f099d1e 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java +++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java @@ -100,7 +100,7 @@ public class S3FileIOProperties implements Serializable { public static final boolean S3_CRT_ENABLED_DEFAULT = true; - /** This property is used to specify the max concurrency for CRT Async clients. */ + /** This property is used to specify the max concurrency for S3 CRT clients. */ public static final String S3_CRT_MAX_CONCURRENCY = "s3.crt.max-concurrency"; /** @@ -1042,7 +1042,7 @@ public void applyEndpointConfigurations(T build } /** - * Override the endpoint for an S3 CRT async client + * Override the endpoint for an S3 CRT client * *

Sample usage: * diff --git a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java index 71b931257cf5..b7e9be67b2ac 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java +++ b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java @@ -37,9 +37,11 @@ import org.mockito.Mockito; import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; import software.amazon.awssdk.core.retry.RetryPolicy; +import software.amazon.awssdk.services.s3.S3AsyncClientBuilder; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.S3ClientBuilder; import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.S3CrtAsyncClientBuilder; import software.amazon.awssdk.services.s3.model.ObjectCannedACL; import software.amazon.awssdk.services.s3.model.Tag; @@ -51,6 +53,7 @@ public class TestS3FileIOProperties { private static final String S3_DELETE_TAG_KEY = "my_key"; private static final String S3_DELETE_TAG_VALUE = "my_value"; + @SuppressWarnings("MethodLength") @Test public void testS3FileIOPropertiesDefaultValues() { S3FileIOProperties s3FileIOProperties = new S3FileIOProperties(); @@ -122,8 +125,18 @@ public void testS3FileIOPropertiesDefaultValues() { .isEqualTo(s3FileIOProperties.isDeleteEnabled()); assertThat(Collections.emptyMap()).isEqualTo(s3FileIOProperties.bucketToAccessPointMapping()); + + assertThat(S3FileIOProperties.S3_CRT_MAX_CONCURRENCY_DEFAULT) + .isEqualTo(s3FileIOProperties.s3CrtMaxConcurrency()); + + assertThat(S3FileIOProperties.S3_CRT_ENABLED_DEFAULT) + .isEqualTo(s3FileIOProperties.isS3CRTEnabled()); + + assertThat(S3FileIOProperties.S3_ANALYTICS_ACCELERATOR_ENABLED_DEFAULT) + .isEqualTo(s3FileIOProperties.isS3AnalyticsAcceleratorEnabled()); } + @SuppressWarnings("MethodLength") @Test public void testS3FileIOProperties() { Map map = getTestProperties(); @@ -265,6 +278,20 @@ public void testS3FileIOProperties() { String.valueOf(s3FileIOProperties.isRemoteSigningEnabled())); assertThat(map).containsEntry(S3FileIOProperties.WRITE_STORAGE_CLASS, "INTELLIGENT_TIERING"); + + assertThat(map) + .containsEntry( + S3FileIOProperties.S3_CRT_MAX_CONCURRENCY, + String.valueOf(s3FileIOProperties.s3CrtMaxConcurrency())); + + assertThat(map) + .containsEntry( + S3FileIOProperties.S3_CRT_ENABLED, String.valueOf(s3FileIOProperties.isS3CRTEnabled())); + + assertThat(map) + .containsEntry( + S3FileIOProperties.S3_ANALYTICS_ACCELERATOR_ENABLED, + String.valueOf(s3FileIOProperties.isS3AnalyticsAcceleratorEnabled())); } @Test @@ -412,6 +439,9 @@ private Map getTestProperties() { map.put(S3FileIOProperties.PRELOAD_CLIENT_ENABLED, "true"); map.put(S3FileIOProperties.REMOTE_SIGNING_ENABLED, "true"); map.put(S3FileIOProperties.WRITE_STORAGE_CLASS, "INTELLIGENT_TIERING"); + map.put(S3FileIOProperties.S3_CRT_MAX_CONCURRENCY, "200"); + map.put(S3FileIOProperties.S3_CRT_ENABLED, "false"); + map.put(S3FileIOProperties.S3_ANALYTICS_ACCELERATOR_ENABLED, "true"); return map; } @@ -489,9 +519,17 @@ public void testApplyEndpointConfiguration() { properties.put(S3FileIOProperties.ENDPOINT, "endpoint"); S3FileIOProperties s3FileIOProperties = new S3FileIOProperties(properties); S3ClientBuilder mockS3ClientBuilder = Mockito.mock(S3ClientBuilder.class); + S3AsyncClientBuilder mockS3AsyncClientBuilder = Mockito.mock(S3AsyncClientBuilder.class); + S3CrtAsyncClientBuilder mockS3CrtAsyncClientBuilder = + Mockito.mock(S3CrtAsyncClientBuilder.class); s3FileIOProperties.applyEndpointConfigurations(mockS3ClientBuilder); + s3FileIOProperties.applyEndpointConfigurations(mockS3AsyncClientBuilder); + s3FileIOProperties.applyEndpointConfigurations(mockS3CrtAsyncClientBuilder); + Mockito.verify(mockS3ClientBuilder).endpointOverride(Mockito.any(URI.class)); + Mockito.verify(mockS3AsyncClientBuilder).endpointOverride(Mockito.any(URI.class)); + Mockito.verify(mockS3CrtAsyncClientBuilder).endpointOverride(Mockito.any(URI.class)); } @Test @@ -505,6 +543,17 @@ public void testApplyUserAgentConfigurations() { .overrideConfiguration(Mockito.any(ClientOverrideConfiguration.class)); } + @Test + public void testApplyS3CrtConfigurations() { + Map properties = Maps.newHashMap(); + S3FileIOProperties s3FileIOProperties = new S3FileIOProperties(properties); + S3CrtAsyncClientBuilder mockS3CrtAsyncClientBuilder = + Mockito.mock(S3CrtAsyncClientBuilder.class); + s3FileIOProperties.applyS3CrtConfigurations(mockS3CrtAsyncClientBuilder); + + Mockito.verify(mockS3CrtAsyncClientBuilder).maxConcurrency(Mockito.any(Integer.class)); + } + @Test public void testApplyRetryConfiguration() { Map properties = Maps.newHashMap(); diff --git a/docs/docs/aws.md b/docs/docs/aws.md index 5a5e431d89f7..3f322dab95f9 100644 --- a/docs/docs/aws.md +++ b/docs/docs/aws.md @@ -582,12 +582,12 @@ spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCata The Analytics Accelerator Library can work with either the [S3 CRT client](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/crt-based-s3-client.html) or the [S3AsyncClient](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/services/s3/S3AsyncClient.html). The library recommends that you use the S3 CRT client due to its enhanced connection pool management and [higher throughput on downloads](https://aws.amazon.com/blogs/developer/introducing-crt-based-s3-client-and-the-s3-transfer-manager-in-the-aws-sdk-for-java-2-x/). -##### Client Configuration +#### Client Configuration | Property | Default | Description | |------------------------|---------|--------------------------------------------------------------| | s3.crt.enabled | `true` | Controls if the S3 Async clients should be created using CRT | -| s3.crt.max-concurrency | `500` | Max concurrency for S3 CRT Async clients | +| s3.crt.max-concurrency | `500` | Max concurrency for S3 CRT clients | Additional library specific configurations are organized into the following sections: