Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,16 @@ public class S3FileIOProperties implements Serializable {

public static final boolean DUALSTACK_ENABLED_DEFAULT = false;

/**
* Determines if S3 client will allow Cross-Region bucket access, default to false.
*
* <p>For more details, see
* https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html
*/
public static final String CROSS_REGION_ACCESS_ENABLED = "s3.cross-region-access-enabled";

public static final boolean CROSS_REGION_ACCESS_ENABLED_DEFAULT = false;

/**
* Used by {@link S3FileIO}, prefix used for bucket access point configuration. To set, we can
* pass a catalog property.
Expand Down Expand Up @@ -399,6 +409,7 @@ public class S3FileIOProperties implements Serializable {
private final Map<String, String> bucketToAccessPointMapping;
private boolean isPreloadClientEnabled;
private boolean isDualStackEnabled;
private boolean isCrossRegionAccessEnabled;
private boolean isPathStyleAccess;
private boolean isUseArnRegionEnabled;
private boolean isAccelerationEnabled;
Expand Down Expand Up @@ -431,6 +442,7 @@ public S3FileIOProperties() {
this.bucketToAccessPointMapping = Collections.emptyMap();
this.isPreloadClientEnabled = PRELOAD_CLIENT_ENABLED_DEFAULT;
this.isDualStackEnabled = DUALSTACK_ENABLED_DEFAULT;
this.isCrossRegionAccessEnabled = CROSS_REGION_ACCESS_ENABLED_DEFAULT;
this.isPathStyleAccess = PATH_STYLE_ACCESS_DEFAULT;
this.isUseArnRegionEnabled = USE_ARN_REGION_ENABLED_DEFAULT;
this.isAccelerationEnabled = ACCELERATION_ENABLED_DEFAULT;
Expand Down Expand Up @@ -472,6 +484,9 @@ public S3FileIOProperties(Map<String, String> properties) {
properties, ACCELERATION_ENABLED, ACCELERATION_ENABLED_DEFAULT);
this.isDualStackEnabled =
PropertyUtil.propertyAsBoolean(properties, DUALSTACK_ENABLED, DUALSTACK_ENABLED_DEFAULT);
this.isCrossRegionAccessEnabled =
PropertyUtil.propertyAsBoolean(
properties, CROSS_REGION_ACCESS_ENABLED, CROSS_REGION_ACCESS_ENABLED_DEFAULT);
try {
this.multiPartSize =
PropertyUtil.propertyAsInt(properties, MULTIPART_SIZE, MULTIPART_SIZE_DEFAULT);
Expand Down Expand Up @@ -625,6 +640,10 @@ public boolean isDualStackEnabled() {
return this.isDualStackEnabled;
}

public boolean isCrossRegionAccessEnabled() {
return this.isCrossRegionAccessEnabled;
}

public boolean isPathStyleAccess() {
return this.isPathStyleAccess;
}
Expand Down Expand Up @@ -749,7 +768,7 @@ public <T extends S3ClientBuilder> void applyCredentialConfigurations(

/**
* Configure services settings for an S3 client. The settings include: s3DualStack,
* s3UseArnRegion, s3PathStyleAccess, and s3Acceleration
* crossRegionAccessEnabled, s3UseArnRegion, s3PathStyleAccess, and s3Acceleration
*
* <p>Sample usage:
*
Expand All @@ -760,6 +779,7 @@ public <T extends S3ClientBuilder> void applyCredentialConfigurations(
public <T extends S3ClientBuilder> void applyServiceConfigurations(T builder) {
builder
.dualstackEnabled(isDualStackEnabled)
.crossRegionAccessEnabled(isCrossRegionAccessEnabled)
.serviceConfiguration(
S3Configuration.builder()
.pathStyleAccessEnabled(isPathStyleAccess)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ public void testS3FileIOPropertiesDefaultValues() {
Assertions.assertThat(S3FileIOProperties.DUALSTACK_ENABLED_DEFAULT)
.isEqualTo(s3FileIOProperties.isDualStackEnabled());

Assertions.assertThat(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED_DEFAULT)
.isEqualTo(s3FileIOProperties.isCrossRegionAccessEnabled());

Assertions.assertThat(S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT)
.isEqualTo(s3FileIOProperties.isPathStyleAccess());

Expand Down Expand Up @@ -155,6 +158,11 @@ public void testS3FileIOProperties() {
S3FileIOProperties.DUALSTACK_ENABLED,
String.valueOf(s3FileIOProperties.isDualStackEnabled()));

Assertions.assertThat(map)
.containsEntry(
S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED,
String.valueOf(s3FileIOProperties.isCrossRegionAccessEnabled()));

Assertions.assertThat(map)
.containsEntry(
S3FileIOProperties.PATH_STYLE_ACCESS,
Expand Down Expand Up @@ -383,6 +391,7 @@ private Map<String, String> getTestProperties() {
map.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true");
map.put(S3FileIOProperties.ACCELERATION_ENABLED, "true");
map.put(S3FileIOProperties.DUALSTACK_ENABLED, "true");
map.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true");
map.put(
S3FileIOProperties.MULTIPART_SIZE,
String.valueOf(S3FileIOProperties.MULTIPART_SIZE_DEFAULT));
Expand Down Expand Up @@ -428,6 +437,7 @@ public void testApplyCredentialConfigurations() {
public void testApplyS3ServiceConfigurations() {
Map<String, String> properties = Maps.newHashMap();
properties.put(S3FileIOProperties.DUALSTACK_ENABLED, "true");
properties.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true");
properties.put(S3FileIOProperties.PATH_STYLE_ACCESS, "true");
properties.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true");
// acceleration enabled has to be set to false if path style is true
Expand All @@ -439,6 +449,7 @@ public void testApplyS3ServiceConfigurations() {
ArgumentCaptor.forClass(S3Configuration.class);

Mockito.doReturn(mockA).when(mockA).dualstackEnabled(Mockito.anyBoolean());
Mockito.doReturn(mockA).when(mockA).crossRegionAccessEnabled(Mockito.anyBoolean());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please also add Assertions.assertThat(s3Configuration.crossRegionAccessEnabled()).isTrue() at the bottom of this test method

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nastra
crossRegionAccessEnabled is an S3Client attribute rather than S3Configuration (same as dualstackEnabled).
Similar to dualstackEnabled attribute (not asserted in this test method), s3Configuration.crossRegionAccessEnabled() does not exist.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah unfortunately I think getting the value of the attributes from the client isn't possible without doing some reflection hacks.

What I think we can do is add S3 integration tests though to verify the actual cross region access behavior. That seems like a more useful test to verify that when the property is set we can perform operations across regions as expected. cc @jackye1995 @geruh @rahil-c

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 for integ test, there is already a AWS_CROSS_REGION config there that can be used

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's also a AWS_TEST_CROSS_REGION_BUCKET config (although originally added for S3 Access Point tests).
Can anybody help / provide guidance on this feature's integration tests? Thanks.

Mockito.doReturn(mockA).when(mockA).serviceConfiguration(Mockito.any(S3Configuration.class));

s3FileIOProperties.applyServiceConfigurations(mockA);
Expand Down
16 changes: 16 additions & 0 deletions docs/docs/aws.md
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,22 @@ spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCata

For more details on using S3 Access Grants, please refer to [Managing access with S3 Access Grants](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-grants.html).

### S3 Cross-Region Access

S3 Cross-Region bucket access can be turned on by setting catalog property `s3.cross-region-access-enabled` to `true`.
This is turned off by default to avoid first S3 API call increased latency.

For example, to enable S3 Cross-Region bucket access with Spark 3.3, you can start the Spark SQL shell with:
```
spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
--conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
--conf spark.sql.catalog.my_catalog.type=glue \
--conf spark.sql.catalog.my_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO \
--conf spark.sql.catalog.my_catalog.s3.cross-region-access-enabled=true
```

For more details, please refer to [Cross-Region access for Amazon S3](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html).

### S3 Acceleration

[S3 Acceleration](https://aws.amazon.com/s3/transfer-acceleration/) can be used to speed up transfers to and from Amazon S3 by as much as 50-500% for long-distance transfer of larger objects.
Expand Down