Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,29 @@ public void testNewInputStreamWithAccessPoint() throws Exception {
validateRead(s3FileIO);
}

@Test
public void testCrossRegionAccessEnabled() throws Exception {
clientFactory.initialize(
ImmutableMap.of(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true"));
S3Client s3Client = clientFactory.s3();
String crossBucketObjectKey = String.format("%s/%s", prefix, UUID.randomUUID());
String crossBucketObjectUri =
String.format("s3://%s/%s", crossRegionBucketName, crossBucketObjectKey);
try {
s3Client.putObject(
PutObjectRequest.builder()
.bucket(crossRegionBucketName)
.key(crossBucketObjectKey)
.build(),
RequestBody.fromBytes(contentBytes));
// make a copy in cross-region bucket
S3FileIO s3FileIO = new S3FileIO(clientFactory::s3);
validateRead(s3FileIO, crossBucketObjectUri);
} finally {
AwsIntegTestUtil.cleanS3Bucket(s3Client, crossRegionBucketName, crossBucketObjectKey);
}
}

@Test
public void testNewInputStreamWithCrossRegionAccessPoint() throws Exception {
clientFactory.initialize(ImmutableMap.of(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true"));
Expand Down Expand Up @@ -550,7 +573,11 @@ private void write(S3FileIO s3FileIO, String uri) throws Exception {
}

private void validateRead(S3FileIO s3FileIO) throws Exception {
InputFile file = s3FileIO.newInputFile(objectUri);
validateRead(s3FileIO, objectUri);
}

private void validateRead(S3FileIO s3FileIO, String s3Uri) throws Exception {
InputFile file = s3FileIO.newInputFile(s3Uri);
assertThat(file.getLength()).isEqualTo(contentBytes.length);
try (InputStream stream = file.newStream()) {
String result = IoUtils.toUtf8String(stream);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,16 @@ public class S3FileIOProperties implements Serializable {

public static final boolean DUALSTACK_ENABLED_DEFAULT = false;

/**
* Determines if S3 client will allow Cross-Region bucket access, default to false.
*
* <p>For more details, see
* https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html
*/
public static final String CROSS_REGION_ACCESS_ENABLED = "s3.cross-region-access-enabled";

public static final boolean CROSS_REGION_ACCESS_ENABLED_DEFAULT = false;

/**
* Used by {@link S3FileIO}, prefix used for bucket access point configuration. To set, we can
* pass a catalog property.
Expand Down Expand Up @@ -442,6 +452,7 @@ public class S3FileIOProperties implements Serializable {
private final Map<String, String> bucketToAccessPointMapping;
private boolean isPreloadClientEnabled;
private final boolean isDualStackEnabled;
private final boolean isCrossRegionAccessEnabled;
private final boolean isPathStyleAccess;
private final boolean isUseArnRegionEnabled;
private final boolean isAccelerationEnabled;
Expand Down Expand Up @@ -477,6 +488,7 @@ public S3FileIOProperties() {
this.bucketToAccessPointMapping = Collections.emptyMap();
this.isPreloadClientEnabled = PRELOAD_CLIENT_ENABLED_DEFAULT;
this.isDualStackEnabled = DUALSTACK_ENABLED_DEFAULT;
this.isCrossRegionAccessEnabled = CROSS_REGION_ACCESS_ENABLED_DEFAULT;
this.isPathStyleAccess = PATH_STYLE_ACCESS_DEFAULT;
this.isUseArnRegionEnabled = USE_ARN_REGION_ENABLED_DEFAULT;
this.isAccelerationEnabled = ACCELERATION_ENABLED_DEFAULT;
Expand Down Expand Up @@ -521,6 +533,9 @@ public S3FileIOProperties(Map<String, String> properties) {
properties, ACCELERATION_ENABLED, ACCELERATION_ENABLED_DEFAULT);
this.isDualStackEnabled =
PropertyUtil.propertyAsBoolean(properties, DUALSTACK_ENABLED, DUALSTACK_ENABLED_DEFAULT);
this.isCrossRegionAccessEnabled =
PropertyUtil.propertyAsBoolean(
properties, CROSS_REGION_ACCESS_ENABLED, CROSS_REGION_ACCESS_ENABLED_DEFAULT);
try {
this.multiPartSize =
PropertyUtil.propertyAsInt(properties, MULTIPART_SIZE, MULTIPART_SIZE_DEFAULT);
Expand Down Expand Up @@ -680,6 +695,10 @@ public boolean isDualStackEnabled() {
return this.isDualStackEnabled;
}

public boolean isCrossRegionAccessEnabled() {
return this.isCrossRegionAccessEnabled;
}

public boolean isPathStyleAccess() {
return this.isPathStyleAccess;
}
Expand Down Expand Up @@ -832,7 +851,7 @@ public <T extends S3ClientBuilder> void applyCredentialConfigurations(

/**
* Configure services settings for an S3 client. The settings include: s3DualStack,
* s3UseArnRegion, s3PathStyleAccess, and s3Acceleration
* crossRegionAccessEnabled, s3UseArnRegion, s3PathStyleAccess, and s3Acceleration
*
* <p>Sample usage:
*
Expand All @@ -843,6 +862,7 @@ public <T extends S3ClientBuilder> void applyCredentialConfigurations(
public <T extends S3ClientBuilder> void applyServiceConfigurations(T builder) {
builder
.dualstackEnabled(isDualStackEnabled)
.crossRegionAccessEnabled(isCrossRegionAccessEnabled)
.serviceConfiguration(
S3Configuration.builder()
.pathStyleAccessEnabled(isPathStyleAccess)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ public void testS3FileIOPropertiesDefaultValues() {
assertThat(S3FileIOProperties.DUALSTACK_ENABLED_DEFAULT)
.isEqualTo(s3FileIOProperties.isDualStackEnabled());

assertThat(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED_DEFAULT)
.isEqualTo(s3FileIOProperties.isCrossRegionAccessEnabled());

assertThat(S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT)
.isEqualTo(s3FileIOProperties.isPathStyleAccess());

Expand Down Expand Up @@ -155,6 +158,11 @@ public void testS3FileIOProperties() {
S3FileIOProperties.DUALSTACK_ENABLED,
String.valueOf(s3FileIOProperties.isDualStackEnabled()));

assertThat(map)
.containsEntry(
S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED,
String.valueOf(s3FileIOProperties.isCrossRegionAccessEnabled()));

assertThat(map)
.containsEntry(
S3FileIOProperties.PATH_STYLE_ACCESS,
Expand Down Expand Up @@ -382,6 +390,7 @@ private Map<String, String> getTestProperties() {
map.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true");
map.put(S3FileIOProperties.ACCELERATION_ENABLED, "true");
map.put(S3FileIOProperties.DUALSTACK_ENABLED, "true");
map.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true");
map.put(
S3FileIOProperties.MULTIPART_SIZE,
String.valueOf(S3FileIOProperties.MULTIPART_SIZE_DEFAULT));
Expand Down Expand Up @@ -427,6 +436,7 @@ public void testApplyCredentialConfigurations() {
public void testApplyS3ServiceConfigurations() {
Map<String, String> properties = Maps.newHashMap();
properties.put(S3FileIOProperties.DUALSTACK_ENABLED, "true");
properties.put(S3FileIOProperties.CROSS_REGION_ACCESS_ENABLED, "true");
properties.put(S3FileIOProperties.PATH_STYLE_ACCESS, "true");
properties.put(S3FileIOProperties.USE_ARN_REGION_ENABLED, "true");
// acceleration enabled has to be set to false if path style is true
Expand All @@ -438,6 +448,7 @@ public void testApplyS3ServiceConfigurations() {
ArgumentCaptor.forClass(S3Configuration.class);

Mockito.doReturn(mockA).when(mockA).dualstackEnabled(Mockito.anyBoolean());
Mockito.doReturn(mockA).when(mockA).crossRegionAccessEnabled(Mockito.anyBoolean());
Mockito.doReturn(mockA).when(mockA).serviceConfiguration(Mockito.any(S3Configuration.class));

s3FileIOProperties.applyServiceConfigurations(mockA);
Expand Down
16 changes: 16 additions & 0 deletions docs/docs/aws.md
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,22 @@ spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCata

For more details on using S3 Access Grants, please refer to [Managing access with S3 Access Grants](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-grants.html).

### S3 Cross-Region Access

S3 Cross-Region bucket access can be turned on by setting catalog property `s3.cross-region-access-enabled` to `true`.
This is turned off by default to avoid first S3 API call increased latency.

For example, to enable S3 Cross-Region bucket access with Spark 3.3, you can start the Spark SQL shell with:
```
spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
--conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
--conf spark.sql.catalog.my_catalog.type=glue \
--conf spark.sql.catalog.my_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO \
--conf spark.sql.catalog.my_catalog.s3.cross-region-access-enabled=true
```

For more details, please refer to [Cross-Region access for Amazon S3](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/s3-cross-region.html).

### S3 Acceleration

[S3 Acceleration](https://aws.amazon.com/s3/transfer-acceleration/) can be used to speed up transfers to and from Amazon S3 by as much as 50-500% for long-distance transfer of larger objects.
Expand Down