diff --git a/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java b/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java
index 388260a54657..41a07401a1e6 100644
--- a/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java
+++ b/aws/src/integration/java/org/apache/iceberg/aws/s3/TestS3FileIOIntegration.java
@@ -182,6 +182,29 @@ public void testNewInputStreamWithAccessPoint() throws Exception {
validateRead(s3FileIO);
}
+ @Test
+ public void testCrossRegionAccessEnabled() throws Exception {
+ clientFactory.initialize(
+ ImmutableMap.of(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true"));
+ S3Client s3Client = clientFactory.s3();
+ String crossBucketObjectKey = String.format("%s/%s", prefix, UUID.randomUUID());
+ String crossBucketObjectUri =
+ String.format("s3://%s/%s", crossRegionBucketName, crossBucketObjectKey);
+ try {
+ s3Client.putObject(
+ PutObjectRequest.builder()
+ .bucket(crossRegionBucketName)
+ .key(crossBucketObjectKey)
+ .build(),
+ RequestBody.fromBytes(contentBytes));
+ // make a copy in cross-region bucket
+ S3FileIO s3FileIO = new S3FileIO(clientFactory::s3);
+ validateRead(s3FileIO, crossBucketObjectUri);
+ } finally {
+ AwsIntegTestUtil.cleanS3Bucket(s3Client, crossRegionBucketName, crossBucketObjectKey);
+ }
+ }
+
@Test
public void testNewInputStreamWithCrossRegionAccessPoint() throws Exception {
clientFactory.initialize(ImmutableMap.of(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true"));
@@ -550,7 +573,11 @@ private void write(S3FileIO s3FileIO, String uri) throws Exception {
}
private void validateRead(S3FileIO s3FileIO) throws Exception {
- InputFile file = s3FileIO.newInputFile(objectUri);
+ validateRead(s3FileIO, objectUri);
+ }
+
+ private void validateRead(S3FileIO s3FileIO, String s3Uri) throws Exception {
+ InputFile file = s3FileIO.newInputFile(s3Uri);
assertThat(file.getLength()).isEqualTo(contentBytes.length);
try (InputStream stream = file.newStream()) {
String result = IoUtils.toUtf8String(stream);
diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java
index 6813913a4db0..b77400a904aa 100644
--- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java
+++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java
@@ -376,6 +376,16 @@ public class S3FileIOProperties implements Serializable {
public static final boolean DUALSTACK_ENABLED_DEFAULT = false;
+ /**
+ * Determines if S3 client will allow Cross-Region bucket access, default to false.
+ *
+ *
For more details, see
+ * https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html
+ */
+ public static final String CROSS_REGION_ACCESS_ENABLED = "s3.cross-region-access-enabled";
+
+ public static final boolean CROSS_REGION_ACCESS_ENABLED_DEFAULT = false;
+
/**
* Used by {@link S3FileIO}, prefix used for bucket access point configuration. To set, we can
* pass a catalog property.
@@ -442,6 +452,7 @@ public class S3FileIOProperties implements Serializable {
private final Map bucketToAccessPointMapping;
private boolean isPreloadClientEnabled;
private final boolean isDualStackEnabled;
+ private final boolean isCrossRegionAccessEnabled;
private final boolean isPathStyleAccess;
private final boolean isUseArnRegionEnabled;
private final boolean isAccelerationEnabled;
@@ -477,6 +488,7 @@ public S3FileIOProperties() {
this.bucketToAccessPointMapping = Collections.emptyMap();
this.isPreloadClientEnabled = PRELOAD_CLIENT_ENABLED_DEFAULT;
this.isDualStackEnabled = DUALSTACK_ENABLED_DEFAULT;
+ this.isCrossRegionAccessEnabled = CROSS_REGION_ACCESS_ENABLED_DEFAULT;
this.isPathStyleAccess = PATH_STYLE_ACCESS_DEFAULT;
this.isUseArnRegionEnabled = USE_ARN_REGION_ENABLED_DEFAULT;
this.isAccelerationEnabled = ACCELERATION_ENABLED_DEFAULT;
@@ -521,6 +533,9 @@ public S3FileIOProperties(Map properties) {
properties, ACCELERATION_ENABLED, ACCELERATION_ENABLED_DEFAULT);
this.isDualStackEnabled =
PropertyUtil.propertyAsBoolean(properties, DUALSTACK_ENABLED, DUALSTACK_ENABLED_DEFAULT);
+ this.isCrossRegionAccessEnabled =
+ PropertyUtil.propertyAsBoolean(
+ properties, CROSS_REGION_ACCESS_ENABLED, CROSS_REGION_ACCESS_ENABLED_DEFAULT);
try {
this.multiPartSize =
PropertyUtil.propertyAsInt(properties, MULTIPART_SIZE, MULTIPART_SIZE_DEFAULT);
@@ -680,6 +695,10 @@ public boolean isDualStackEnabled() {
return this.isDualStackEnabled;
}
+ public boolean isCrossRegionAccessEnabled() {
+ return this.isCrossRegionAccessEnabled;
+ }
+
public boolean isPathStyleAccess() {
return this.isPathStyleAccess;
}
@@ -832,7 +851,7 @@ public void applyCredentialConfigurations(
/**
* Configure services settings for an S3 client. The settings include: s3DualStack,
- * s3UseArnRegion, s3PathStyleAccess, and s3Acceleration
+ * crossRegionAccessEnabled, s3UseArnRegion, s3PathStyleAccess, and s3Acceleration
*
* Sample usage:
*
@@ -843,6 +862,7 @@ public void applyCredentialConfigurations(
public void applyServiceConfigurations(T builder) {
builder
.dualstackEnabled(isDualStackEnabled)
+ .crossRegionAccessEnabled(isCrossRegionAccessEnabled)
.serviceConfiguration(
S3Configuration.builder()
.pathStyleAccessEnabled(isPathStyleAccess)
diff --git a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java
index a61b9efb9fec..71b931257cf5 100644
--- a/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java
+++ b/aws/src/test/java/org/apache/iceberg/aws/s3/TestS3FileIOProperties.java
@@ -72,6 +72,9 @@ public void testS3FileIOPropertiesDefaultValues() {
assertThat(S3FileIOProperties.DUALSTACK_ENABLED_DEFAULT)
.isEqualTo(s3FileIOProperties.isDualStackEnabled());
+ assertThat(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED_DEFAULT)
+ .isEqualTo(s3FileIOProperties.isCrossRegionAccessEnabled());
+
assertThat(S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT)
.isEqualTo(s3FileIOProperties.isPathStyleAccess());
@@ -155,6 +158,11 @@ public void testS3FileIOProperties() {
S3FileIOProperties.DUALSTACK_ENABLED,
String.valueOf(s3FileIOProperties.isDualStackEnabled()));
+ assertThat(map)
+ .containsEntry(
+ S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED,
+ String.valueOf(s3FileIOProperties.isCrossRegionAccessEnabled()));
+
assertThat(map)
.containsEntry(
S3FileIOProperties.PATH_STYLE_ACCESS,
@@ -382,6 +390,7 @@ private Map getTestProperties() {
map.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true");
map.put(S3FileIOProperties.ACCELERATION_ENABLED, "true");
map.put(S3FileIOProperties.DUALSTACK_ENABLED, "true");
+ map.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true");
map.put(
S3FileIOProperties.MULTIPART_SIZE,
String.valueOf(S3FileIOProperties.MULTIPART_SIZE_DEFAULT));
@@ -427,6 +436,7 @@ public void testApplyCredentialConfigurations() {
public void testApplyS3ServiceConfigurations() {
Map properties = Maps.newHashMap();
properties.put(S3FileIOProperties.DUALSTACK_ENABLED, "true");
+ properties.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true");
properties.put(S3FileIOProperties.PATH_STYLE_ACCESS, "true");
properties.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true");
// acceleration enabled has to be set to false if path style is true
@@ -438,6 +448,7 @@ public void testApplyS3ServiceConfigurations() {
ArgumentCaptor.forClass(S3Configuration.class);
Mockito.doReturn(mockA).when(mockA).dualstackEnabled(Mockito.anyBoolean());
+ Mockito.doReturn(mockA).when(mockA).crossRegionAccessEnabled(Mockito.anyBoolean());
Mockito.doReturn(mockA).when(mockA).serviceConfiguration(Mockito.any(S3Configuration.class));
s3FileIOProperties.applyServiceConfigurations(mockA);
diff --git a/docs/docs/aws.md b/docs/docs/aws.md
index 5a166c0c9193..e408cb5a2ae4 100644
--- a/docs/docs/aws.md
+++ b/docs/docs/aws.md
@@ -514,6 +514,22 @@ spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCata
For more details on using S3 Access Grants, please refer to [Managing access with S3 Access Grants](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-grants.html).
+### S3 Cross-Region Access
+
+S3 Cross-Region bucket access can be turned on by setting catalog property `s3.cross-region-access-enabled` to `true`.
+This is turned off by default to avoid first S3 API call increased latency.
+
+For example, to enable S3 Cross-Region bucket access with Spark 3.3, you can start the Spark SQL shell with:
+```
+spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
+ --conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
+ --conf spark.sql.catalog.my_catalog.type=glue \
+ --conf spark.sql.catalog.my_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO \
+ --conf spark.sql.catalog.my_catalog.s3.cross-region-access-enabled=true
+```
+
+For more details, please refer to [Cross-Region access for Amazon S3](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html).
+
### S3 Acceleration
[S3 Acceleration](https://aws.amazon.com/s3/transfer-acceleration/) can be used to speed up transfers to and from Amazon S3 by as much as 50-500% for long-distance transfer of larger objects.