diff --git a/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java b/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java index 388260a54657..41a07401a1e6 100644 --- a/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java +++ b/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java @@ -182,6 +182,29 @@ public void testNewInputStreamWithAccessPoint() throws Exception { validateRead(s3FileIO); } + @Test + public void testCrossRegionAccessEnabled() throws Exception { + clientFactory.initialize( + ImmutableMap.of(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true")); + S3Client s3Client = clientFactory.s3(); + String crossBucketObjectKey = String.format("%s/%s", prefix, UUID.randomUUID()); + String crossBucketObjectUri = + String.format("s3://%s/%s", crossRegionBucketName, crossBucketObjectKey); + try { + s3Client.putObject( + PutObjectRequest.builder() + .bucket(crossRegionBucketName) + .key(crossBucketObjectKey) + .build(), + RequestBody.fromBytes(contentBytes)); + // make a copy in cross-region bucket + S3FileIO s3FileIO = new S3FileIO(clientFactory::s3); + validateRead(s3FileIO, crossBucketObjectUri); + } finally { + AwsIntegTestUtil.cleanS3Bucket(s3Client, crossRegionBucketName, crossBucketObjectKey); + } + } + @Test public void testNewInputStreamWithCrossRegionAccessPoint() throws Exception { clientFactory.initialize(ImmutableMap.of(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true")); @@ -550,7 +573,11 @@ private void write(S3FileIO s3FileIO, String uri) throws Exception { } private void validateRead(S3FileIO s3FileIO) throws Exception { - InputFile file = s3FileIO.newInputFile(objectUri); + validateRead(s3FileIO, objectUri); + } + + private void validateRead(S3FileIO s3FileIO, String s3Uri) throws Exception { + InputFile file = s3FileIO.newInputFile(s3Uri); assertThat(file.getLength()).isEqualTo(contentBytes.length); try (InputStream stream = file.newStream()) { String result = IoUtils.toUtf8String(stream); diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java index 6813913a4db0..b77400a904aa 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java +++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java @@ -376,6 +376,16 @@ public class S3FileIOProperties implements Serializable { public static final boolean DUALSTACK_ENABLED_DEFAULT = false; + /** + * Determines if S3 client will allow Cross-Region bucket access, default to false. + * + *

For more details, see + * https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html + */ + public static final String CROSS_REGION_ACCESS_ENABLED = "s3.cross-region-access-enabled"; + + public static final boolean CROSS_REGION_ACCESS_ENABLED_DEFAULT = false; + /** * Used by {@link S3FileIO}, prefix used for bucket access point configuration. To set, we can * pass a catalog property. @@ -442,6 +452,7 @@ public class S3FileIOProperties implements Serializable { private final Map bucketToAccessPointMapping; private boolean isPreloadClientEnabled; private final boolean isDualStackEnabled; + private final boolean isCrossRegionAccessEnabled; private final boolean isPathStyleAccess; private final boolean isUseArnRegionEnabled; private final boolean isAccelerationEnabled; @@ -477,6 +488,7 @@ public S3FileIOProperties() { this.bucketToAccessPointMapping = Collections.emptyMap(); this.isPreloadClientEnabled = PRELOAD_CLIENT_ENABLED_DEFAULT; this.isDualStackEnabled = DUALSTACK_ENABLED_DEFAULT; + this.isCrossRegionAccessEnabled = CROSS_REGION_ACCESS_ENABLED_DEFAULT; this.isPathStyleAccess = PATH_STYLE_ACCESS_DEFAULT; this.isUseArnRegionEnabled = USE_ARN_REGION_ENABLED_DEFAULT; this.isAccelerationEnabled = ACCELERATION_ENABLED_DEFAULT; @@ -521,6 +533,9 @@ public S3FileIOProperties(Map properties) { properties, ACCELERATION_ENABLED, ACCELERATION_ENABLED_DEFAULT); this.isDualStackEnabled = PropertyUtil.propertyAsBoolean(properties, DUALSTACK_ENABLED, DUALSTACK_ENABLED_DEFAULT); + this.isCrossRegionAccessEnabled = + PropertyUtil.propertyAsBoolean( + properties, CROSS_REGION_ACCESS_ENABLED, CROSS_REGION_ACCESS_ENABLED_DEFAULT); try { this.multiPartSize = PropertyUtil.propertyAsInt(properties, MULTIPART_SIZE, MULTIPART_SIZE_DEFAULT); @@ -680,6 +695,10 @@ public boolean isDualStackEnabled() { return this.isDualStackEnabled; } + public boolean isCrossRegionAccessEnabled() { + return this.isCrossRegionAccessEnabled; + } + public boolean isPathStyleAccess() { return this.isPathStyleAccess; } @@ -832,7 +851,7 @@ public void applyCredentialConfigurations( /** * Configure services settings for an S3 client. The settings include: s3DualStack, - * s3UseArnRegion, s3PathStyleAccess, and s3Acceleration + * crossRegionAccessEnabled, s3UseArnRegion, s3PathStyleAccess, and s3Acceleration * *

Sample usage: * @@ -843,6 +862,7 @@ public void applyCredentialConfigurations( public void applyServiceConfigurations(T builder) { builder .dualstackEnabled(isDualStackEnabled) + .crossRegionAccessEnabled(isCrossRegionAccessEnabled) .serviceConfiguration( S3Configuration.builder() .pathStyleAccessEnabled(isPathStyleAccess) diff --git a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java index a61b9efb9fec..71b931257cf5 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java +++ b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java @@ -72,6 +72,9 @@ public void testS3FileIOPropertiesDefaultValues() { assertThat(S3FileIOProperties.DUALSTACK_ENABLED_DEFAULT) .isEqualTo(s3FileIOProperties.isDualStackEnabled()); + assertThat(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED_DEFAULT) + .isEqualTo(s3FileIOProperties.isCrossRegionAccessEnabled()); + assertThat(S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT) .isEqualTo(s3FileIOProperties.isPathStyleAccess()); @@ -155,6 +158,11 @@ public void testS3FileIOProperties() { S3FileIOProperties.DUALSTACK_ENABLED, String.valueOf(s3FileIOProperties.isDualStackEnabled())); + assertThat(map) + .containsEntry( + S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, + String.valueOf(s3FileIOProperties.isCrossRegionAccessEnabled())); + assertThat(map) .containsEntry( S3FileIOProperties.PATH_STYLE_ACCESS, @@ -382,6 +390,7 @@ private Map getTestProperties() { map.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true"); map.put(S3FileIOProperties.ACCELERATION_ENABLED, "true"); map.put(S3FileIOProperties.DUALSTACK_ENABLED, "true"); + map.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true"); map.put( S3FileIOProperties.MULTIPART_SIZE, String.valueOf(S3FileIOProperties.MULTIPART_SIZE_DEFAULT)); @@ -427,6 +436,7 @@ public void testApplyCredentialConfigurations() { public void testApplyS3ServiceConfigurations() { Map properties = Maps.newHashMap(); properties.put(S3FileIOProperties.DUALSTACK_ENABLED, "true"); + properties.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true"); properties.put(S3FileIOProperties.PATH_STYLE_ACCESS, "true"); properties.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true"); // acceleration enabled has to be set to false if path style is true @@ -438,6 +448,7 @@ public void testApplyS3ServiceConfigurations() { ArgumentCaptor.forClass(S3Configuration.class); Mockito.doReturn(mockA).when(mockA).dualstackEnabled(Mockito.anyBoolean()); + Mockito.doReturn(mockA).when(mockA).crossRegionAccessEnabled(Mockito.anyBoolean()); Mockito.doReturn(mockA).when(mockA).serviceConfiguration(Mockito.any(S3Configuration.class)); s3FileIOProperties.applyServiceConfigurations(mockA); diff --git a/docs/docs/aws.md b/docs/docs/aws.md index 5a166c0c9193..e408cb5a2ae4 100644 --- a/docs/docs/aws.md +++ b/docs/docs/aws.md @@ -514,6 +514,22 @@ spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCata For more details on using S3 Access Grants, please refer to [Managing access with S3 Access Grants](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-grants.html). +### S3 Cross-Region Access + +S3 Cross-Region bucket access can be turned on by setting catalog property `s3.cross-region-access-enabled` to `true`. +This is turned off by default to avoid first S3 API call increased latency. + +For example, to enable S3 Cross-Region bucket access with Spark 3.3, you can start the Spark SQL shell with: +``` +spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \ + --conf spark.sql.catalog.my_catalog.type=glue \ + --conf spark.sql.catalog.my_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO \ + --conf spark.sql.catalog.my_catalog.s3.cross-region-access-enabled=true +``` + +For more details, please refer to [Cross-Region access for Amazon S3](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html). + ### S3 Acceleration [S3 Acceleration](https://aws.amazon.com/s3/transfer-acceleration/) can be used to speed up transfers to and from Amazon S3 by as much as 50-500% for long-distance transfer of larger objects.