, ClientT> RegionResolution.Resolution
+ configureEndpointAndRegion(BuilderT builder,
+ S3ClientCreationParameters parameters,
+ Configuration conf) throws IOException {
- // FIPs? Log it, then reject any attempt to set an endpoint
- final boolean fipsEnabled = parameters.isFipsEnabled();
- if (fipsEnabled) {
- LOG.debug("Enabling FIPS mode");
- }
- // always setting it guarantees the value is non-null,
+ final RegionResolution.Resolution resolution =
+ calculateRegion(parameters, conf);
+ LOG.debug("Region Resolution: {}", resolution);
+
+ // always setting to true or false guarantees the value is non-null,
// which tests expect.
- builder.fipsEnabled(fipsEnabled);
-
- if (endpoint != null) {
- boolean endpointEndsWithCentral =
- endpointStr.endsWith(CENTRAL_ENDPOINT);
- checkArgument(!fipsEnabled || endpointEndsWithCentral, "%s : %s",
- ERROR_ENDPOINT_WITH_FIPS,
- endpoint);
-
- // No region was configured,
- // determine the region from the endpoint.
- if (region == null) {
- region = getS3RegionFromEndpoint(endpointStr,
- endpointEndsWithCentral);
- if (region != null) {
- origin = "endpoint";
- }
- }
+ builder.fipsEnabled(resolution.isUseFips());
- // No need to override endpoint with "s3.amazonaws.com".
- // Let the client take care of endpoint resolution. Overriding
- // the endpoint with "s3.amazonaws.com" causes 400 Bad Request
- // errors for non-existent buckets and objects.
- // ref: https://github.com/aws/aws-sdk-java-v2/issues/4846
- if (!endpointEndsWithCentral) {
- builder.endpointOverride(endpoint);
- LOG.debug("Setting endpoint to {}", endpoint);
- } else {
- origin = "central endpoint with cross region access";
- LOG.debug("Enabling cross region access for endpoint {}",
- endpointStr);
- }
- }
+ if (Sdk != resolution.getMechanism()) {
- if (region != null) {
- builder.region(region);
- } else if (configuredRegion == null) {
- // no region is configured, and none could be determined from the endpoint.
- // Use US_EAST_2 as default.
- region = Region.of(AWS_S3_DEFAULT_REGION);
- builder.region(region);
- origin = "cross region access fallback";
- } else if (configuredRegion.isEmpty()) {
- // region configuration was set to empty string.
- // allow this if people really want it; it is OK to rely on this
- // when deployed in EC2.
- WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE);
- LOG.debug(SDK_REGION_CHAIN_IN_USE);
- origin = "SDK region chain";
- }
- boolean isCrossRegionAccessEnabled = conf.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED,
- AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT);
- // s3 cross region access
- if (isCrossRegionAccessEnabled) {
- builder.crossRegionAccessEnabled(true);
+ // a region has been determined from configuration,
+ // or it is falling back to central region.
+
+ final Region region = resolution.getRegion();
+ builder.region(requireNonNull(region));
+ // s3 cross region access
+ if (resolution.isCrossRegionAccessEnabled()) {
+ builder.crossRegionAccessEnabled(true);
+ }
+ final URI endpointUri = resolution.getEndpointUri();
+ if (endpointUri != null && !resolution.isUseCentralEndpoint()) {
+ LOG.debug("Setting endpoint to {}", endpointUri);
+ builder.endpointOverride(endpointUri);
+ }
}
- LOG.debug("Setting region to {} from {} with cross region access {}",
- region, origin, isCrossRegionAccessEnabled);
+ return resolution;
}
/**
* Given a endpoint string, create the endpoint URI.
- *
+ * Kept in as subclasses use it.
* @param endpoint possibly null endpoint.
* @param conf config to build the URI from.
* @return an endpoint uri
*/
protected static URI getS3Endpoint(String endpoint, final Configuration conf) {
-
boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS);
-
- String protocol = secureConnections ? "https" : "http";
-
- if (endpoint == null || endpoint.isEmpty()) {
- // don't set an endpoint if none is configured, instead let the SDK figure it out.
- return null;
- }
-
- if (!endpoint.contains("://")) {
- endpoint = String.format("%s://%s", protocol, endpoint);
- }
-
- try {
- return new URI(endpoint);
- } catch (URISyntaxException e) {
- throw new IllegalArgumentException(e);
- }
- }
-
- /**
- * Parses the endpoint to get the region.
- * If endpoint is the central one, use US_EAST_2.
- *
- * @param endpoint the configure endpoint.
- * @param endpointEndsWithCentral true if the endpoint is configured as central.
- * @return the S3 region, null if unable to resolve from endpoint.
- */
- @VisibleForTesting
- static Region getS3RegionFromEndpoint(final String endpoint,
- final boolean endpointEndsWithCentral) {
-
- if (!endpointEndsWithCentral) {
- // S3 VPC endpoint parsing
- Matcher matcher = VPC_ENDPOINT_PATTERN.matcher(endpoint);
- if (matcher.find()) {
- LOG.debug("Mapping to VPCE");
- LOG.debug("Endpoint {} is vpc endpoint; parsing region as {}", endpoint, matcher.group(1));
- return Region.of(matcher.group(1));
- }
-
- LOG.debug("Endpoint {} is not the default; parsing", endpoint);
- return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME).orElse(null);
- }
-
- // Select default region here to enable cross-region access.
- // If both "fs.s3a.endpoint" and "fs.s3a.endpoint.region" are empty,
- // Spark sets "fs.s3a.endpoint" to "s3.amazonaws.com".
- // This applies to Spark versions with the changes of SPARK-35878.
- // ref:
- // https://github.com/apache/spark/blob/v3.5.0/core/
- // src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala#L528
- // If we do not allow cross region access, Spark would not be able to
- // access any bucket that is not present in the given region.
- // Hence, we should use default region us-east-2 to allow cross-region
- // access.
- return Region.of(AWS_S3_DEFAULT_REGION);
+ return RegionResolution.buildEndpointUri(endpoint, secureConnections);
}
private static , ClientT> void
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java
index 910723f909f57..6f9c249b31a97 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java
@@ -22,6 +22,7 @@
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
+import java.util.Locale;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -99,13 +100,14 @@ public static void bindSSLChannelMode(Configuration conf,
/**
* Is this an AWS endpoint? looks at end of FQDN.
- * @param endpoint endpoint
- * @return true if the endpoint matches the requirements for an aws endpoint.
+ * @param endpoint endpoint.
+ * @return true iff this is non-empty or ends with amazonaws.com or amazonaws.com.cn
*/
public static boolean isAwsEndpoint(final String endpoint) {
+ final String host = endpoint.toLowerCase(Locale.ROOT);
return (endpoint.isEmpty()
- || endpoint.endsWith(".amazonaws.com")
- || endpoint.endsWith(".amazonaws.com.cn"));
+ || host.endsWith(".amazonaws.com")
+ || host.endsWith(".amazonaws.com.cn"));
}
/**
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RegionResolution.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RegionResolution.java
new file mode 100644
index 0000000000000..f902e1f808b69
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RegionResolution.java
@@ -0,0 +1,545 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.impl;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Optional;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import javax.annotation.Nullable;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.awscore.util.AwsHostNameUtils;
+import software.amazon.awssdk.regions.Region;
+
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.s3a.Retries;
+import org.apache.hadoop.fs.s3a.S3ClientFactory;
+
+import static java.util.Objects.requireNonNull;
+import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED;
+import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT;
+import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.Constants.EMPTY_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
+import static org.apache.hadoop.fs.s3a.Constants.SDK_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS;
+import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.isAwsEndpoint;
+import static org.apache.hadoop.fs.s3a.impl.RegionResolution.RegionResolutionMechanism.ExternalEndpoint;
+import static org.apache.hadoop.fs.s3a.impl.RegionResolution.RegionResolutionMechanism.FallbackToCentral;
+import static org.apache.hadoop.util.Preconditions.checkArgument;
+import static software.amazon.awssdk.regions.Region.US_EAST_1;
+
+/**
+ * Region resolution.
+ * This is complicated and can be a source of support escalations.
+ *
The V1 SDK was happy to take an endpoint and
+ * work details out from there, possibly probing us-central-1 and cacheing
+ * the result.
+ *
The V2 SDK likes the signing region and endpoint to be declared.
+ * The S3A connector has tried to mimic the V1 code, but lacks some features
+ * (use of environment variables, probing of EC2 IAM details) for which
+ * the SDK is better.
+ *
+ * - If region is configured via fs.s3a.endpoint.region, use it.
+ * - If endpoint is configured via via fs.s3a.endpoint, set it.
+ * If no region is configured, try to parse region from endpoint.
+ * - If no region is configured, and it could not be parsed from the endpoint,
+ * set the default region as US_EAST_2
+ * - If configured region is empty, fallback to SDK resolution chain.
+ * - S3 cross region is enabled by default irrespective of region or endpoint
+ * is set or not.
+ *
+ */
+public class RegionResolution {
+
+ protected static final Logger LOG =
+ LoggerFactory.getLogger(RegionResolution.class);
+
+ /**
+ * Service to ask SDK to parse.
+ */
+ private static final String S3_SERVICE_NAME = "s3";
+
+ /**
+ * Pattern to match vpce endpoints on.
+ */
+ private static final Pattern VPC_ENDPOINT_PATTERN =
+ Pattern.compile("^(?:.+\\.)?([a-z0-9-]+)\\.vpce\\.amazonaws\\.(?:com|com\\.cn)$");
+
+ /**
+ * Error message when an endpoint is set with FIPS enabled: {@value}.
+ */
+ @VisibleForTesting
+ public static final String ERROR_ENDPOINT_WITH_FIPS =
+ "Only S3 central endpoint cannot be set when " + FIPS_ENDPOINT + " is true";
+
+ /**
+ * Virtual hostnames MUST be used when using the FIPS endpoint.
+ */
+ public static final String FIPS_PATH_ACCESS_INCOMPATIBLE =
+ "Path style access must be disabled when " + FIPS_ENDPOINT + " is true";
+
+ /**
+ * String value for external region: {@value}.
+ */
+ public static final String EXTERNAL = "external";
+
+ /**
+ * External region, used for third party endpoints.
+ */
+ public static final Region EXTERNAL_REGION = Region.of(EXTERNAL);
+
+ private RegionResolution() {
+ }
+
+ /**
+ * How was the region resolved?
+ */
+ public enum RegionResolutionMechanism {
+
+ /** Endpoint inference. */
+ CalculatedFromEndpoint("Calculated from endpoint."),
+
+ /** It's an external endpoint */
+ ExternalEndpoint("External endpoint"),
+
+ /** No resolution: falling back to central endpoint. */
+ FallbackToCentral("Fallback to central endpoint"),
+
+ /** Connection is a VPCE endpoint which was parsed for the region. */
+ ParseVpceEndpoint("Parse VPCE Endpoint"),
+
+ /** SDK requested. */
+ Sdk("SDK resolution chain"),
+
+ /** Set in configuration. */
+ Specified("Region specified");
+
+ /**
+ * Text of the mechanism.
+ */
+ private final String mechanism;
+
+ RegionResolutionMechanism(String mechanism) {
+ this.mechanism = mechanism;
+ }
+
+ /**
+ * String value of the resolution mechanism.
+ * @return the resolution mechanism.
+ */
+ public String getMechanism() {
+ return mechanism;
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder("RegionResolutionMechanism{");
+ sb.append("mechanism='").append(mechanism).append('\'');
+ sb.append('}');
+ return sb.toString();
+ }
+ }
+
+ /**
+ * The resolution of a region and endpoint..
+ */
+ public static final class Resolution {
+
+ /**
+ * Region: if null hand down to the SDK.
+ */
+ private Region region;
+
+ /**
+ * How was the region resolved?
+ * Null means unresolved.
+ */
+ private RegionResolutionMechanism mechanism;
+
+ /**
+ * Should FIPS be enabled?
+ */
+ private boolean useFips;
+
+ /**
+ * Should cross-region access be enabled?
+ */
+ private boolean crossRegionAccessEnabled;
+
+ /**
+ * Endpoint as string.
+ */
+ private String endpointStr;
+
+ /**
+ * Endpoint URI.
+ */
+ private URI endpointUri;
+
+ /**
+ * Use the central endpoint?
+ */
+ private boolean useCentralEndpoint;
+
+ /** Empty constructor. */
+ public Resolution() {
+ }
+
+ /**
+ * Instantiate with a region and resolution mechanism.
+ * @param region region
+ * @param mechanism resolution mechanism.
+ */
+ public Resolution(final Region region, final RegionResolutionMechanism mechanism) {
+ this.region = region;
+ this.mechanism = mechanism;
+ }
+
+ /**
+ * Set the region.
+ * Declares the region as resolved even when the value is null (i.e. resolve to SDK).
+ * @param resolvedRegion region
+ * @param resolutionMechanism resolution mechanism.
+ * @return the builder
+ */
+ public Resolution withRegion(
+ @Nullable final Region resolvedRegion,
+ final RegionResolutionMechanism resolutionMechanism) {
+ this.region = resolvedRegion;
+ this.mechanism = requireNonNull(resolutionMechanism);
+ return this;
+ }
+
+ /**
+ * Set builder value.
+ * @param value new value
+ * @return the builder
+ */
+ public Resolution withUseFips(final boolean value) {
+ useFips = value;
+ return this;
+ }
+
+ /**
+ * Set builder value.
+ * @param value new value
+ * @return the builder
+ */
+ public Resolution withCrossRegionAccessEnabled(final boolean value) {
+ crossRegionAccessEnabled = value;
+ return this;
+ }
+
+ /**
+ * Set builder value.
+ * @param value new value
+ * @return the builder
+ */
+ public Resolution withEndpointStr(final String value) {
+ endpointStr = value;
+ return this;
+ }
+
+ /**
+ * Endpoint URI.
+ * @return value if set.
+ */
+ public URI getEndpointUri() {
+ return endpointUri;
+ }
+
+ /**
+ * Set builder value.
+ * @param value new value
+ * @return the builder
+ */
+ public Resolution withEndpointUri(final URI value) {
+ endpointUri = value;
+ return this;
+ }
+
+ /**
+ * Endpoint as string.
+ * @return value if set.
+ */
+ public String getEndpointStr() {
+ return endpointStr;
+ }
+
+ /**
+ * Region: if null hand down to the SDK.
+ * @return value if set.
+ */
+ public Region getRegion() {
+ return region;
+ }
+
+ /**
+ * Should FIPS be enabled?
+ * @return flag state.
+ */
+ public boolean isUseFips() {
+ return useFips;
+ }
+
+ /**
+ * Should cross-region access be enabled?
+ * @return flag state.
+ */
+ public boolean isCrossRegionAccessEnabled() {
+ return crossRegionAccessEnabled;
+ }
+
+ /**
+ * How was the region resolved?
+ * Null means unresolved.
+ * @return value if set.
+ */
+ public RegionResolutionMechanism getMechanism() {
+ return mechanism;
+ }
+
+ /**
+ * Is the region resolved.
+ * @return true if there's been a resolution.
+ */
+ public boolean isRegionResolved() {
+ return mechanism != null;
+ }
+
+ /**
+ * Use the central endpoint?
+ * @return flag state.
+ */
+ public boolean isUseCentralEndpoint() {
+ return useCentralEndpoint;
+ }
+
+ /**
+ * Set builder value.
+ * @param value new value
+ * @return the builder
+ */
+ public Resolution withUseCentralEndpoint(final boolean value) {
+ useCentralEndpoint = value;
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder("Resolution{");
+ sb.append("region=").append(region);
+ sb.append(", resolution=").append(mechanism);
+ sb.append(", useFips=").append(useFips);
+ sb.append(", crossRegionAccessEnabled=").append(crossRegionAccessEnabled);
+ sb.append(", endpointUri=").append(endpointUri);
+ sb.append(", useCentralEndpoint=").append(useCentralEndpoint);
+ sb.append('}');
+ return sb.toString();
+ }
+ }
+
+ /**
+ * Given a endpoint string, create the endpoint URI.
+ * @param endpoint possibly null endpoint.
+ * @param secureConnections use secure HTTPS connection?
+ * @return an endpoint uri or null if the endpoint was passed in was null/empty
+ * @throws IllegalArgumentException failure to parse the endpoint.
+ */
+ public static URI buildEndpointUri(String endpoint, final boolean secureConnections) {
+
+ String protocol = secureConnections ? "https" : "http";
+
+ if (endpoint == null || endpoint.isEmpty()) {
+ // don't set an endpoint if none is configured, instead let the SDK figure it out.
+ return null;
+ }
+
+ if (!endpoint.contains("://")) {
+ endpoint = String.format("%s://%s", protocol, endpoint);
+ }
+
+ try {
+ return new URI(endpoint);
+ } catch (URISyntaxException e) {
+ throw new IllegalArgumentException(e);
+ }
+ }
+
+ /**
+ * Parses the endpoint to get the region.
+ * If endpoint is the central one, use US_EAST_1.
+ * @param endpoint the configure endpoint.
+ * @param endpointEndsWithCentral true if the endpoint is configured as central.
+ * @return the S3 region resolution if possible from parsing the endpoint
+ */
+ @VisibleForTesting
+ public static Optional determineS3RegionFromEndpoint(
+ final String endpoint,
+ final boolean endpointEndsWithCentral) {
+
+ if (!endpointEndsWithCentral) {
+ // S3 VPC endpoint parsing
+ Matcher matcher = VPC_ENDPOINT_PATTERN.matcher(endpoint);
+ if (matcher.find()) {
+ LOG.debug("Mapping to VPCE");
+ LOG.debug("Endpoint {} is VPC endpoint; parsing region as {}",
+ endpoint, matcher.group(1));
+ return Optional.of(new Resolution(
+ Region.of(matcher.group(1)),
+ RegionResolutionMechanism.ParseVpceEndpoint));
+ }
+
+ LOG.debug("Endpoint {} is not the default; parsing signing region from name.", endpoint);
+ return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME)
+ .map(r ->
+ new Resolution(r, RegionResolutionMechanism.CalculatedFromEndpoint));
+ }
+
+ // No resolution.
+ return Optional.empty();
+ }
+
+
+ /**
+ * Does the region name refer to an SDK region?
+ * @param configuredRegion region in the configuration
+ * @return true if this is considered to refer to an SDK region.
+ */
+ public static boolean isSdkRegion(String configuredRegion) {
+ return SDK_REGION.equalsIgnoreCase(configuredRegion)
+ || EMPTY_REGION.equalsIgnoreCase(configuredRegion);
+ }
+
+ /**
+ * Calculate the region and the final endpoint.
+ * @param parameters creation parameters
+ * @param conf configuration with other options.
+ * @return the resolved region and endpoint.
+ * @throws IOException if the client failed to communicate with the IAM service.
+ * @throws IllegalArgumentException failure to parse endpoint, or FIPS settings.
+ */
+ @Retries.OnceTranslated
+ public static Resolution calculateRegion(
+ final S3ClientFactory.S3ClientCreationParameters parameters,
+ final Configuration conf) throws IOException {
+
+ Resolution resolution = new Resolution();
+
+ // endpoint; may be null
+ final String endpointStr = parameters.getEndpoint();
+ boolean endpointDeclared = endpointStr != null && !endpointStr.isEmpty();
+ final URI endpoint;
+ if (endpointDeclared) {
+ endpoint = buildEndpointUri(endpointStr,
+ conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS));
+ } else {
+ // set to null if endpointStr is null/empty
+ endpoint = null;
+ }
+
+ final String configuredRegion = parameters.getRegion();
+
+ // If the region was configured, set it.
+ // this includes special handling of the sdk, ec2 and "" regions.
+ if (configuredRegion != null) {
+ checkArgument(!"null".equals(configuredRegion),
+ "null is region name");
+ if (isSdkRegion(configuredRegion)) {
+ resolution.withRegion(null, RegionResolutionMechanism.Sdk);
+ } else {
+ resolution.withRegion(Region.of(configuredRegion),
+ RegionResolutionMechanism.Specified);
+ }
+ }
+
+ // central endpoint if no endpoint has been set, or it is explicitly
+ // requested
+ boolean endpointEndsWithCentral = !endpointDeclared
+ || endpointStr.endsWith(CENTRAL_ENDPOINT);
+
+ if (!resolution.isRegionResolved()) {
+ // parse from the endpoint and set if calculated
+ LOG.debug("Attempting to determine region from endpoint {}; endpointEndsWithCentral={}",
+ endpointStr, endpointEndsWithCentral);
+ determineS3RegionFromEndpoint(endpointStr, endpointEndsWithCentral).ifPresent(r ->
+ resolution.withRegion(r.getRegion(), r.getMechanism()));
+ }
+
+ // cross region setting.
+ resolution.withCrossRegionAccessEnabled(
+ conf.getBoolean(AWS_S3_CROSS_REGION_ACCESS_ENABLED,
+ AWS_S3_CROSS_REGION_ACCESS_ENABLED_DEFAULT));
+
+ // fips settings.
+ final boolean fipsEnabled = parameters.isFipsEnabled();
+ resolution.withUseFips(fipsEnabled);
+ if (fipsEnabled) {
+ // validate the FIPS settings
+ checkArgument(endpoint == null || endpointEndsWithCentral,
+ "%s : %s", ERROR_ENDPOINT_WITH_FIPS, endpoint);
+ checkArgument(!parameters.isPathStyleAccess(),
+ FIPS_PATH_ACCESS_INCOMPATIBLE);
+ }
+
+ if (!resolution.isRegionResolved()) {
+ // still not resolved.
+ if (!endpointDeclared || isAwsEndpoint(endpointStr)) {
+ // still failing to resolve the region
+ // fall back to central
+ resolution.withRegion(US_EAST_1, FallbackToCentral);
+ } else {
+ // we are not resolved and not an aws region.
+ // set the region to being "external"
+ resolution.withRegion(EXTERNAL_REGION, ExternalEndpoint);
+ }
+ }
+
+ // No need to override endpoint with "s3.amazonaws.com".
+ // Let the client take care of endpoint resolution. Overriding
+ // the endpoint with "s3.amazonaws.com" causes 400 Bad Request
+ // errors for non-existent buckets and objects.
+ // ref: https://github.com/aws/aws-sdk-java-v2/issues/4846
+ if (endpointEndsWithCentral) {
+ resolution.withUseCentralEndpoint(true);
+ } else {
+ LOG.debug("Setting endpoint to {}", endpoint);
+ resolution.withEndpointStr(endpointStr)
+ .withEndpointUri(endpoint)
+ .withUseCentralEndpoint(false);
+ }
+
+ final Region r = resolution.getRegion();
+ if (r != null && !Region.regions().contains(r)) {
+ // note that the region isn't known.
+ // not an issue for third party stores, otherwise it may be a region newer than
+ // that expected by the SDK. Hence: only log at debug.
+ LOG.debug("Region {} is not recognized by this SDK", r);
+ }
+ return resolution;
+ }
+
+}
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
index 3645a652b64cb..c1794d00841c3 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md
@@ -61,7 +61,7 @@ The S3A connector supports S3 cross region access via AWS SDK which is enabled b
Not supported:
* AWS [Snowball](https://aws.amazon.com/snowball/).
-As of December 2023, AWS S3 uses Transport Layer Security (TLS) [version 1.2](https://aws.amazon.com/blogs/security/tls-1-2-required-for-aws-endpoints/) to secure the communications channel; the S3A client is does this through
+As of December 2023, AWS S3 uses Transport Layer Security (TLS) [version 1.2](https://aws.amazon.com/blogs/security/tls-1-2-required-for-aws-endpoints/) to secure the communications channel; the S3A client does this through
the Apache [HttpClient library](https://hc.apache.org/index.html).
### Third party stores
@@ -74,80 +74,191 @@ _MUST_ be installed on the JVMs on hosts within the Hadoop cluster.
See [Working with Third-party S3 Stores](third_party_stores.html) *after* reading this document.
-## Connection Settings
+## Endpoint and Region Settings
-There are three core settings to connect to an S3 store, endpoint, region and whether or not to use path style access.
+There are three core settings to connect to an S3 store, endpoint, region and whether to use path style access.
+
+The term "endpoint" means the URL or hostname of the remote s3 store.
+The default S3 endpoint is `s3.amazonaws.com`
+When a request is made to a bucket and path style access is false, the hostname to
+make HTTP requests from is prefixed to the endpoint. A bucket `example` would
+end up with a name `example.s3.amazonaws.com`.
+
+S3 Buckets are hosted in different AWS regions.
+
+Each region has its own S3 endpoint, documented [by Amazon](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region).
+
+1. Applications running in EC2 infrastructure do not pay for IO to/from
+ *local S3 buckets*. They will be billed for access to remote buckets. Always
+ use local buckets and local copies of data, wherever possible.
+2. With the V4 signing protocol, AWS requires the explicit region endpoint
+ to be used —hence S3A must be configured to use the specific endpoint. This
+ is done by setting the region in the configuration option `fs.s3a.endpoint.region`,
+ or by explicitly setting `fs.s3a.endpoint` and `fs.s3a.endpoint.region`.
+3. All endpoints other than the default region only support interaction
+ with buckets local to that S3 instance.
+4. Standard S3 buckets support "cross-region" access where use of the original `us-east-1`
+ endpoint allows access to the data, but newer storage types, particularly S3 Express are
+ not supported.
+
+If the wrong endpoint is used, the request will fail. This may be reported as a 301/redirect error,
+or as a "400 Bad Request": take these as cues to check the endpoint setting of
+a bucket.
+
+The up-to-date list of regions is [Available online](https://docs.aws.amazon.com/general/latest/gr/s3.html).
+
+Knowing the region of a bucket is key to be able to communicate and authenticate with
+an S3 bucket.
```xml
-
- fs.s3a.endpoint
- AWS S3 endpoint to connect to. An up-to-date list is
- provided in the AWS Documentation: regions and endpoints. Without this
- property, the endpoint/hostname of the S3 Store is inferred from
- the value of fs.s3a.endpoint.region, fs.s3a.endpoint.fips and more.
-
-
fs.s3a.endpoint.region
REGION
- AWS Region of the data
+ AWS Region of the bucket
+
+
+
+ fs.s3a.endpoint
+ AWS S3 endpoint to connect to.
+ Leave blank for the SDK to determine it from the region and/or other settings.
+
fs.s3a.path.style.access
false
Enable S3 path style access by disabling the default virtual hosting behaviour.
- Needed for AWS PrivateLink, S3 AccessPoints, and, generally, third party stores.
+ Needed for AWS PrivateLink, S3 AccessPoints, and third party stores.
Default: false.
```
-Historically the S3A connector has preferred the endpoint as defined by the option `fs.s3a.endpoint`.
+There are also some secondary options. The `fs.s3a.endpoint.fips` is covered in its own section;
+the option `fs.s3a.cross.region.access.enabled` is generally left alone -this SDK feature is
+often critical when configuring a cluster to work with data round the world.
+
+```xml
+
+ fs.s3a.cross.region.access.enabled
+ true
+ SDK to fall back to cross-region bucket access
+
+
+
+ fs.s3a.endpoint.fips
+ false
+ Use the FIPS endpoint
+
+```
+
+Historically the S3A connector preferred the endpoint as defined by the option `fs.s3a.endpoint`.
With the move to the AWS V2 SDK, there is more emphasis on the region, set by the `fs.s3a.endpoint.region` option.
-Normally, declaring the region in `fs.s3a.endpoint.region` should be sufficient to set up the network connection to correctly connect to an AWS-hosted S3 store.
+Normally, declaring the region in `fs.s3a.endpoint.region` should be sufficient to set up the network
+connection to correctly connect to an _AWS-hosted S3 store_.
+
+When connecting to third-party stores, the `fs.s3a.endpoint` option becomes critical;
+the value of `fs.s3a.endpoint.region` can still tune s3a client behavior.
### S3 endpoint and region settings in detail
-* Configs `fs.s3a.endpoint` and `fs.s3a.endpoint.region` are used to set values
- for S3 endpoint and region respectively.
-* If `fs.s3a.endpoint.region` is configured with valid AWS region value, S3A will
- configure the S3 client to use this value. If this is set to a region that does
- not match your bucket, you will receive a 301 redirect response.
-* If `fs.s3a.endpoint.region` is not set and `fs.s3a.endpoint` is set with valid
- endpoint value, S3A will attempt to parse the region from the endpoint and
- configure S3 client to use the region value.
-* If both `fs.s3a.endpoint` and `fs.s3a.endpoint.region` are not set, S3A will
- use `us-east-2` as default region and enable cross region access. In this case,
- S3A does not attempt to override the endpoint while configuring the S3 client.
-* If `fs.s3a.endpoint` is not set and `fs.s3a.endpoint.region` is set to an empty
- string, S3A will configure S3 client without any region or endpoint override.
- This will allow fallback to S3 SDK region resolution chain. More details
- [here](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/region-selection.html).
-* If `fs.s3a.endpoint` is set to central endpoint `s3.amazonaws.com` and
- `fs.s3a.endpoint.region` is not set, S3A will use `us-east-2` as default region
- and enable cross region access. In this case, S3A does not attempt to override
- the endpoint while configuring the S3 client.
-* If `fs.s3a.endpoint` is set to central endpoint `s3.amazonaws.com` and
- `fs.s3a.endpoint.region` is also set to some region, S3A will use that region
- value and enable cross region access. In this case, S3A does not attempt to
- override the endpoint while configuring the S3 client.
-
-When the cross region access is enabled while configuring the S3 client, even if the
-region set is incorrect, S3 SDK determines the region. This is done by making the
-request, and if the SDK receives 301 redirect response, it determines the region at
-the cost of a HEAD request, and caches it.
-
-Please note that some endpoint and region settings that require cross region access
+1. Configuration options `fs.s3a.endpoint.region` and `fs.s3a.endpoint` are used to set values
+ for the S3 region and endpoint respectively.
+2. If `fs.s3a.endpoint.region` is configured with valid AWS region value, S3A will
+ configure the S3 client to use this value. If this is set to a region that does
+ not match your bucket, you will receive a 301 redirect response.
+3. If `fs.s3a.endpoint.region` is not set and `fs.s3a.endpoint` is set to an AWS regional endpoint
+ S3A will determine the region by parsing the endpoint string.
+ This works for VPCE, `amazonaws.com` and `amazonaws.cn` endpoints.
+4. If `fs.s3a.endpoint.region` is set to `sdk` then region resolution is handled
+ by the SDK. It's process is documented
+ [here](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/region-selection.html).
+5. If both `fs.s3a.endpoint` and `fs.s3a.endpoint.region` are unset, S3A will
+ use `us-east-1` as default region and expect cross-region access.
+6. If `fs.s3a.endpoint` is not set and `fs.s3a.endpoint.region` is set to ""
+ string, S3A will use the SDK Resolution, as when the region is set to `sdk`.
+ (this is different from the resolution point 5 as the string is empty, rather than null)
+7. If `fs.s3a.endpoint` is set to the central endpoint `s3.amazonaws.com` and
+ `fs.s3a.endpoint.region` is not set, S3A will use `us-east-1` as the default region
+ and expect cross-region access.
+
+When the cross-region access is set, the AWS SDK determines the region if when unknown.
+This is done by making the request, and if the SDK receives 301 redirect response, issues
+a HEAD request to the bucket to determine its location.
+This is cached for the duration of the JVM.
+
+This cross-region resolution requires that the host performing the lookup has network access
+to the central region. If the host is in an AWS VPC which lacks such network
+access, cross region lookup will fail.
+
+Please note that some endpoint and region settings that require cross-region access
are complex and improving over time. Hence, they may be considered unstable.
-*Important:* do not use `auto`, `ec2`, or `sdk` as these may be used
-in the future for specific region-binding algorithms.
+If you are working with third party stores, please check [third party stores in detail](./third_party_stores.html).
+
+If this seems confusing: you are correct!
+
+Here is what to do
+
+#### Deploying on EC2 and working with AWS S3 buckets mostly in the local region
+
+1. Leave `fs.s3a.endpoint` unset.
+2. Set `fs.s3a.endpoint.region` to `sdk`.
+3. Leave `fs.s3a.cross.region.access.enabled` as `true`.
+
+This hands off resolution to the SDK, which will use the IAM service to determine the local region.
+The SDK will use this to build the endpoint URL and sign all requests.
+
+Remote buckets will be accessed via probes to `s3.amazonaws.com`, relying on
+cross-region access to resolve their location.
+
+
+#### On-prem access to AWS S3 where the bucket region is known
+
+1. Leave `fs.s3a.endpoint` unset.
+2. Set `fs.s3a.endpoint.region` to the region of the bucket.
+
+The AWS SDK will choose the correct endpoint for the bucket region and sign requests
+appropriately.
+
+#### On-prem access to AWS S3 where the bucket region is **not** known
+
+1. Leave `fs.s3a.endpoint` unset.
+2. Set `fs.s3a.endpoint.region` to `sdk`
+3. Leave `fs.s3a.cross.region.access.enabled` as `true`.
+
+The AWS SDK will attempt to connect to the bucket via the central `s3.amazonaws.com` region;
+if it is elsewhere it will determine the correct location.
+
+#### On-prem access to AWS S3 through VPCE
+
+1. Set `fs.s3a.endpoint` to the VPCE endpoint
+2. Set `fs.s3a.endpoint.region` to the region of the bucket, *or leave unset*
+3. Set `fs.s3a.path.style.access` to `true`.
+
+```xml
+
+ fs.s3a.bucket.example.endpoint
+ https://bucket.vpce-05ba4f2400000-x92g7xzc.s3.us-west-2.vpce.amazonaws.com/
+
+
+
+ fs.s3a.bucket.example.path.style.access
+ true
+
+```
+
+#### Third party stores
+
+See [Third Party Stores](./third_party_stores.html) for the full details and example settings.
+* Set `fs.s3a.endpoint` to the full URL of the service, or, if it supports virtual hostnames,
+to the domain name to which virtual hosts are prefixed.
+* Set `fs.s3a.endpoint.region` to `external`.
+* If working with an HTTP endpoint, set `fs.s3a.bucket.connection.ssl.enabled` to false.
-If you are working with third party stores, please check [third party stores in detail](third_party_stores.html).
### Network timeouts
@@ -285,6 +396,12 @@ Core aspects of pool settings are:
```
+Using OpenSSL is 5-10% faster than using the java 8 TLS implementation, that is: *SQL queries complete faster*.
+
+It is hard to set up and a bit brittle, but if possible, use it!
+
+
+
### Proxy Settings
Connections to S3A stores can be made through an HTTP or HTTPS proxy.
@@ -343,38 +460,6 @@ if long-lived connections have problems.
## Using Per-Bucket Configuration to access data round the world
-S3 Buckets are hosted in different "regions", the default being "US-East-1".
-The S3A client talks to this region by default, issuing HTTP requests
-to the server `s3.amazonaws.com`.
-
-S3A can work with buckets from any region. Each region has its own
-S3 endpoint, documented [by Amazon](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region).
-
-1. Applications running in EC2 infrastructure do not pay for IO to/from
-*local S3 buckets*. They will be billed for access to remote buckets. Always
-use local buckets and local copies of data, wherever possible.
-2. With the V4 signing protocol, AWS requires the explicit region endpoint
-to be used —hence S3A must be configured to use the specific endpoint. This
-is done by setting the regon in the configuration option `fs.s3a.endpoint.region`,
-or by explicitly setting `fs.s3a.endpoint` and `fs.s3a.endpoint.region`.
-3. All endpoints other than the default region only support interaction
-with buckets local to that S3 instance.
-4. Standard S3 buckets support "cross-region" access where use of the original `us-east-1`
- endpoint allows access to the data, but newer storage types, particularly S3 Express are
- not supported.
-
-
-
-If the wrong endpoint is used, the request will fail. This may be reported as a 301/redirect error,
-or as a 400 Bad Request: take these as cues to check the endpoint setting of
-a bucket.
-
-The up to date list of regions is [Available online](https://docs.aws.amazon.com/general/latest/gr/s3.html).
-
-This list can be used to specify the endpoint of individual buckets, for example
-for buckets in the us-west-2 and EU/Ireland endpoints.
-
-
```xml
fs.s3a.bucket.us-west-2-dataset.endpoint.region
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/third_party_stores.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/third_party_stores.md
index 0336efa677c0b..f1b4f3ed3fb8c 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/third_party_stores.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/third_party_stores.md
@@ -102,7 +102,7 @@ AWS SDK requires the name of a region is supplied for signing, and that region m
Third-party stores don't normally care about the name of a region, *only that a region is supplied*.
You should set `fs.s3a.endpoint.region` to anything except the following reserved names: `sdk`, `ec2` and `auto`.
-We have plans for those.
+Recommended: `external`
## Other issues
@@ -445,7 +445,6 @@ The S3A client's creation of an endpoint URL generates an unknown host.
```
-
```
ls: software.amazon.awssdk.core.exception.SdkClientException:
Received an UnknownHostException when attempting to interact with a service.
@@ -491,7 +490,7 @@ at [ECS Test Drive](https://portal.ecstestdrive.com/) were
```xml
fs.s3a.endpoint.region
- dell
+ external
arbitrary name other than sdk, ec2, auto or null
@@ -564,7 +563,7 @@ this makes renaming and deleting significantly slower.
fs.s3a.endpoint.region
- gcs
+ external
@@ -640,3 +639,67 @@ It is also a way to regression test foundational S3A third-party store compatibi
_Note_ If anyone is set up to test this regularly, please let the hadoop developer team know if regressions do surface,
as it is not a common test configuration.
We do use it to help test compatibility during SDK updates.
+
+## RustFS localhost with no https
+
+RustFS is an easy to deploy S3 store.
+
+In tests of the S3A connector in December 2025 we observed:
+1. Eventual consistency in path deletion (LIST responses included recently deleted objects; HEAD correctly returned 404)
+2. Eventual consistency in lists of multipart object uploads (`s3guard uploads` command, *and* s3a committer cleanup)
+3. Case inconsistency when running on a MacOS system; not tested elsewhere.
+4. Other minor issues in niche API calls (`getBucketMetadata()`) which don't affect normal use.
+
+Listing inconsistency after directory deletion is the key issue which may break applications as it means that
+* Newly deleted directories may still return objects.
+* Newly renamed objects may still be listable at the source paths.
+* The logic which determines whether an empty directory marker should be reinserted after a child path deletion may not behave correctly.
+
+It may be safe for use with tables which are designed to work on inconsistent object stores (Apache Iceberg and rivals), but
+it does not, as of December 2025 appear safe for use with classic Hive directory structured tables, through Hive, Spark or other applications.
+Nor are the S3A committers guaranteed to work safely.
+
+Use at your own risk. Running the `hadoop-aws` test suite against your store would be the ideal way to see if later
+versions have changed their behavior.
+
+Example settings for a local rust bucket. Note that `fs.s3a.bucket.rustybucket.connection.ssl.enabled` has been set to false
+as the SDK doesn't look at the http/https prefix of the endpoint to determine which protocol to use.
+
+```xml
+
+ fs.s3a.bucket.rustybucket.access.key
+ rustfsadmin
+
+
+
+ fs.s3a.bucket.rustybucket.secret.key
+ rustfsadmin
+
+
+
+ fs.s3a.bucket.rustybucket.endpoint
+ http://localhost:9000
+
+
+
+
+ fs.s3a.bucket.rustybucket.connection.ssl.enabled
+ false
+
+
+
+ fs.s3a.bucket.rustybucket.endpoint.region
+ external
+
+
+
+ fs.s3a.bucket.rustybucket.path.style.access
+ true
+
+
+
+ fs.s3a.bucket.rustybucket.create.conditional.enabled
+ false
+
+```
+
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java
index 81a295345a8fc..31ab2668b558a 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java
@@ -27,6 +27,7 @@
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.test.AbstractHadoopTestBase;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
@@ -35,7 +36,7 @@
/**
* Abstract base class for S3A unit tests using a mock S3 client.
*/
-public abstract class AbstractS3AMockTest {
+public abstract class AbstractS3AMockTest extends AbstractHadoopTestBase {
protected static final String BUCKET = "mock-bucket";
protected static final AwsServiceException NOT_FOUND =
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
index 25efe7a06e5ae..7259526b2f88a 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
@@ -29,8 +29,10 @@
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;
+import org.opentest4j.AssertionFailedError;
import software.amazon.awssdk.awscore.AwsExecutionAttribute;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
+import software.amazon.awssdk.core.exception.SdkClientException;
import software.amazon.awssdk.core.interceptor.Context;
import software.amazon.awssdk.core.interceptor.ExecutionAttributes;
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
@@ -54,11 +56,12 @@
import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS;
import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_ALGORITHM;
-import static org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.ERROR_ENDPOINT_WITH_FIPS;
+import static org.apache.hadoop.fs.s3a.Constants.SDK_REGION;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assumeNotS3ExpressFileSystem;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assumeStoreAwsHosted;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+import static org.apache.hadoop.fs.s3a.impl.RegionResolution.ERROR_ENDPOINT_WITH_FIPS;
import static org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils.DEFAULT_REQUESTER_PAYS_BUCKET_NAME;
import static org.apache.hadoop.io.IOUtils.closeStream;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
@@ -113,6 +116,9 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase {
* Text to include in assertions.
*/
private static final AtomicReference EXPECTED_MESSAGE = new AtomicReference<>();
+
+ public static final String INCORRECT_REGION_SET = "Incorrect region set";
+
/**
* New FS instance which will be closed in teardown.
*/
@@ -223,6 +229,32 @@ public void testWithRegionConfig() throws Throwable {
expectInterceptorException(client);
}
+ /**
+ * This hands off resolution to the SDK which may fail if nothing can be found
+ * (non-EC2; no AWS_REGION env var or through {@code ~/.aws/config}.
+ * There's separate handling for the different failure modes so this
+ * test will work in all deployments.
+ */
+ @Test
+ public void testWithSDKRegionConfig() throws Throwable {
+ describe("Create a client with an SDK region");
+ Configuration conf = getConfiguration();
+
+ try {
+ S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, SDK_REGION, null, false);
+
+ expectInterceptorException(client);
+ } catch (SdkClientException e) {
+ Assertions.assertThat(e)
+ .describedAs("Exception raised due to unable to resolve region")
+ .hasMessageContaining("region");
+ } catch (AssertionFailedError e) {
+ Assertions.assertThat(e)
+ .describedAs("Exception raised region resolution working on local system")
+ .hasMessageContaining(INCORRECT_REGION_SET);
+ }
+ }
+
@Test
public void testWithFips() throws Throwable {
describe("Create a client with fips enabled");
@@ -646,7 +678,7 @@ public void beforeExecution(Context.BeforeExecution context,
}
Assertions.assertThat(reg)
- .describedAs("Incorrect region set in %s. Client Config=%s",
+ .describedAs(INCORRECT_REGION_SET + " in %s. Client Config=%s",
state, EXPECTED_MESSAGE.get())
.isEqualTo(region);
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java
index 8be0708cad542..3c234381d1c55 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java
@@ -18,10 +18,14 @@
package org.apache.hadoop.fs.s3a;
+import java.util.Optional;
+
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Test;
import software.amazon.awssdk.regions.Region;
+import org.apache.hadoop.fs.s3a.impl.RegionResolution;
+
public class TestS3AEndpointParsing extends AbstractS3AMockTest {
private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com";
@@ -29,15 +33,21 @@ public class TestS3AEndpointParsing extends AbstractS3AMockTest {
private static final String US_WEST_2 = "us-west-2";
private static final String EU_WEST_1 = "eu-west-1";
- @Test
- public void testVPCEndpoint() {
- Region region = DefaultS3ClientFactory.getS3RegionFromEndpoint(VPC_ENDPOINT, false);
- Assertions.assertThat(region).isEqualTo(Region.of(US_WEST_2));
- }
-
- @Test
- public void testNonVPCEndpoint() {
- Region region = DefaultS3ClientFactory.getS3RegionFromEndpoint(NON_VPC_ENDPOINT, false);
- Assertions.assertThat(region).isEqualTo(Region.of(EU_WEST_1));
- }
+ @Test
+ public void testVPCEndpoint() {
+ Optional
+ region = RegionResolution.determineS3RegionFromEndpoint(VPC_ENDPOINT, false);
+ Assertions.assertThat(region).get()
+ .extracting(RegionResolution.Resolution::getRegion)
+ .isEqualTo(Region.of(US_WEST_2));
+ }
+
+ @Test
+ public void testNonVPCEndpoint() {
+ Optional
+ region = RegionResolution.determineS3RegionFromEndpoint(NON_VPC_ENDPOINT, false);
+ Assertions.assertThat(region).get()
+ .extracting(RegionResolution.Resolution::getRegion)
+ .isEqualTo(Region.of(EU_WEST_1));
+ }
}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRegionResolution.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRegionResolution.java
new file mode 100644
index 0000000000000..f26ddd3403def
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRegionResolution.java
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.impl;
+
+import java.io.IOException;
+
+import org.assertj.core.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.regions.Region;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.s3a.S3ClientFactory;
+import org.apache.hadoop.test.AbstractHadoopTestBase;
+
+import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
+import static org.apache.hadoop.fs.s3a.Constants.SDK_REGION;
+import static org.apache.hadoop.fs.s3a.impl.RegionResolution.ERROR_ENDPOINT_WITH_FIPS;
+import static org.apache.hadoop.fs.s3a.impl.RegionResolution.calculateRegion;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+/**
+ * Test region resolution logic in {@link RegionResolution}.
+ * These are based on {@code ITestS3AEndpointRegion}.
+ */
+public class TestRegionResolution extends AbstractHadoopTestBase {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(TestRegionResolution.class);
+
+ private static final String US_EAST_1 = "us-east-1";
+
+ private static final String US_EAST_2 = "us-east-2";
+
+ private static final String US_WEST_2 = "us-west-2";
+
+ private static final String EU_WEST_2 = "eu-west-2";
+
+ private static final String CN_NORTHWEST_1 = "cn-northwest-1";
+
+ private static final String US_GOV_EAST_1 = "us-gov-east-1";
+
+ private static final String EU_WEST_2_ENDPOINT = "s3.eu-west-2.amazonaws.com";
+
+ private static final String CN_ENDPOINT = "s3.cn-northwest-1.amazonaws.com.cn";
+
+ private static final String GOV_ENDPOINT = "s3-fips.us-gov-east-1.amazonaws.com";
+
+ private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com";
+
+ private static final String CN_VPC_ENDPOINT =
+ "vpce-1a2b3c4d-5e6f.s3.cn-northwest-1.vpce.amazonaws.com.cn";
+
+
+ private Configuration getConfiguration() {
+ return new Configuration(false);
+ }
+
+ /**
+ * Describe a test. This is a replacement for javadocs
+ * where the tests role is printed in the log output
+ * @param text description
+ */
+ protected void describe(String text) {
+ LOG.info(text);
+ }
+
+ private RegionResolution.Resolution resolve(Configuration conf,
+ String endpoint,
+ String configuredRegion,
+ boolean isFips,
+ String expectedRegion,
+ final RegionResolution.RegionResolutionMechanism expectedMechanism) throws IOException {
+ S3ClientFactory.S3ClientCreationParameters parameters =
+ new S3ClientFactory.S3ClientCreationParameters()
+ .withEndpoint(endpoint)
+ .withRegion(configuredRegion)
+ .withFipsEnabled(isFips);
+ final RegionResolution.Resolution resolved = calculateRegion(parameters, conf);
+
+ // check the region
+ if (expectedRegion != null) {
+ Assertions.assertThat(resolved.getRegion())
+ .describedAs("Resolved region %s", resolved)
+ .isNotNull()
+ .isEqualTo(Region.of(expectedRegion));
+ } else {
+ Assertions.assertThat(resolved.getRegion())
+ .describedAs("Resolved region %s", resolved)
+ .isNull();
+ }
+
+ // supplied resolution
+ if (expectedMechanism != null) {
+ assertMechanism(expectedMechanism, resolved);
+ }
+ return resolved;
+ }
+
+ /**
+ * Assert that a resolution used a specific mechanism.
+ * @param expectedMechanism expected mechanism.
+ * @param resolved resolved region
+ */
+ private static void assertMechanism(
+ final RegionResolution.RegionResolutionMechanism expectedMechanism,
+ final RegionResolution.Resolution resolved) {
+ Assertions.assertThat(resolved.getMechanism())
+ .describedAs("Resolution mechanism of %s", resolved)
+ .isEqualTo(expectedMechanism);
+ }
+
+ @Test
+ public void testWithVPCE() throws IOException {
+ resolve(getConfiguration(), VPC_ENDPOINT, null, false, US_WEST_2,
+ RegionResolution.RegionResolutionMechanism.ParseVpceEndpoint);
+ }
+
+ @Test
+ public void testWithChinaVPCE() throws IOException {
+ final RegionResolution.Resolution r =
+ resolve(getConfiguration(), CN_VPC_ENDPOINT, null, false,
+ CN_NORTHWEST_1, RegionResolution.RegionResolutionMechanism.ParseVpceEndpoint);
+ assertEndpoint(r, CN_VPC_ENDPOINT);
+ assertUseCentralValue(r, false);
+ }
+
+ @Test
+ public void testCentralEndpointNoRegion() throws IOException {
+ final RegionResolution.Resolution r =
+ resolve(getConfiguration(), CENTRAL_ENDPOINT, null, false,
+ US_EAST_1,
+ RegionResolution.RegionResolutionMechanism.FallbackToCentral);
+ assertEndpoint(r, null);
+ assertUseCentralValue(r, true);
+ }
+
+ @Test
+ public void testCentralEndpointWithRegion() throws IOException {
+ final RegionResolution.Resolution r =
+ resolve(getConfiguration(), CENTRAL_ENDPOINT, US_WEST_2, false,
+ US_WEST_2, RegionResolution.RegionResolutionMechanism.Specified);
+ assertEndpoint(r, null);
+ assertUseCentralValue(r, true);
+ }
+
+ @Test
+ public void testConfiguredRegion() throws IOException {
+ final RegionResolution.Resolution r =
+ resolve(getConfiguration(), null, EU_WEST_2, false,
+ EU_WEST_2, RegionResolution.RegionResolutionMechanism.Specified);
+ // this still uses the central endpoint.
+ assertEndpoint(r, null);
+ assertUseCentralValue(r, true);
+ }
+
+ @Test
+ public void testSDKRegion() throws IOException {
+ final RegionResolution.Resolution r =
+ resolve(getConfiguration(), null, SDK_REGION, false,
+ null, RegionResolution.RegionResolutionMechanism.Sdk);
+ // SDK handles endpoint logic.
+ assertEndpoint(r, null);
+ assertUseCentralValue(r, true);
+ }
+
+ @Test
+ public void testSDKUpperCaseRegion() throws IOException {
+ final RegionResolution.Resolution r =
+ resolve(getConfiguration(), null, "SDK", false,
+ null, RegionResolution.RegionResolutionMechanism.Sdk);
+ // SDK handles endpoint logic.
+ assertEndpoint(r, null);
+ assertUseCentralValue(r, true);
+ }
+
+ @Test
+ public void testEmptyStringRegion() throws IOException {
+ final RegionResolution.Resolution r =
+ resolve(getConfiguration(), null, "", false,
+ null, RegionResolution.RegionResolutionMechanism.Sdk);
+ // SDK handles endpoint logic.
+ assertEndpoint(r, null);
+ assertUseCentralValue(r, true);
+ }
+
+ @Test
+ public void testWithFipsNoEndpoint() throws IOException {
+ describe("Create a client with fips enabled");
+
+ resolve(getConfiguration(),
+ null, EU_WEST_2, true,
+ EU_WEST_2, RegionResolution.RegionResolutionMechanism.Specified);
+ }
+
+ /**
+ * Attempting to create a client with fips enabled and an endpoint specified
+ * fails during client construction.
+ */
+ @Test
+ public void testWithFipsAndEndpoint() throws Exception {
+ describe("Create a client with fips and an endpoint");
+
+ intercept(IllegalArgumentException.class, ERROR_ENDPOINT_WITH_FIPS, () ->
+ resolve(getConfiguration(), US_WEST_2, null, true, US_EAST_1, null));
+ }
+
+ @Test
+ public void testWithRegionConfig() throws IOException {
+ describe("Create a client with a configured region");
+
+ resolve(getConfiguration(), null, EU_WEST_2, false,
+ EU_WEST_2, RegionResolution.RegionResolutionMechanism.Specified);
+ }
+
+ @Test
+ public void testEUWest2Endpoint() throws IOException {
+ describe("specifying an eu-west-2 endpoint selects that region");
+
+ resolve(getConfiguration(), EU_WEST_2_ENDPOINT, null, false,
+ EU_WEST_2, RegionResolution.RegionResolutionMechanism.CalculatedFromEndpoint);
+ }
+
+ @Test
+ public void testWithRegionAndEndpointConfig() throws IOException {
+ describe("Test that when both region and endpoint are configured, region takes precedence");
+
+ resolve(getConfiguration(), EU_WEST_2_ENDPOINT, US_WEST_2, false,
+ US_WEST_2, RegionResolution.RegionResolutionMechanism.Specified);
+ }
+
+ @Test
+ public void testWithChinaEndpoint() throws IOException {
+ describe("Test with a china endpoint");
+ final RegionResolution.Resolution r =
+ resolve(getConfiguration(), CN_ENDPOINT, null, false,
+ CN_NORTHWEST_1,
+ RegionResolution.RegionResolutionMechanism.CalculatedFromEndpoint);
+ assertEndpoint(r, CN_ENDPOINT);
+ assertUseCentralValue(r, false);
+ }
+
+ @Test
+ public void testWithGovCloudEndpoint() throws IOException {
+ describe("Test with a gov cloud endpoint");
+ final RegionResolution.Resolution r =
+ resolve(getConfiguration(), GOV_ENDPOINT, null, false,
+ US_GOV_EAST_1,
+ RegionResolution.RegionResolutionMechanism.CalculatedFromEndpoint);
+ assertEndpoint(r, GOV_ENDPOINT);
+ assertUseCentralValue(r, false);
+ }
+
+ @Test
+ public void testNullIsForbidden() throws Throwable {
+ describe("The region null is forbidden as a red flag of configuration problems");
+ intercept(IllegalArgumentException.class, () ->
+ resolve(getConfiguration(), null, "null", false,
+ null, null));
+ }
+
+ @Test
+ public void testGcsRegion() throws Throwable {
+ resolve(getConfiguration(), "https://storage.googleapis.com", null, false,
+ RegionResolution.EXTERNAL,
+ RegionResolution.RegionResolutionMechanism.ExternalEndpoint);
+ }
+
+ @Test
+ public void testLocalhostRegion() throws Throwable {
+ resolve(getConfiguration(), "127.0.0.1", null, false,
+ RegionResolution.EXTERNAL,
+ RegionResolution.RegionResolutionMechanism.ExternalEndpoint);
+ }
+
+ /**
+ * Assert that an endpoint matches the expected value.
+ * @param r resolution
+ * @param expected expected value.
+ */
+ private static void assertEndpoint(final RegionResolution.Resolution r,
+ final String expected) {
+ Assertions.assertThat(r.getEndpointStr())
+ .describedAs("Endpoint of %s", r)
+ .isEqualTo(expected);
+ }
+
+ /**
+ * assert that the resolution {@code isUseCentralEndpoint()} value
+ * matches that expected.
+ * @param r resolution
+ * @param expected expected value.
+ */
+ private static void assertUseCentralValue(final RegionResolution.Resolution r,
+ final boolean expected) {
+ Assertions.assertThat(r.isUseCentralEndpoint())
+ .describedAs("Endpoint of %s", r)
+ .isEqualTo(expected);
+ }
+
+}
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestBucketTool.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestBucketTool.java
index bbe9d74824b7a..0e7fc76cc9054 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestBucketTool.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestBucketTool.java
@@ -35,10 +35,12 @@
import org.apache.hadoop.util.ExitUtil;
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assumeNotS3ExpressFileSystem;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assumeS3ExpressFileSystem;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assumeStoreAwsHosted;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.expectErrorCode;
+import static org.apache.hadoop.fs.s3a.impl.RegionResolution.isSdkRegion;
import static org.apache.hadoop.fs.s3a.impl.S3ExpressStorage.STORE_CAPABILITY_S3_EXPRESS_STORAGE;
import static org.apache.hadoop.fs.s3a.tools.BucketTool.CREATE;
import static org.apache.hadoop.fs.s3a.tools.BucketTool.NO_ZONE_SUPPLIED;
@@ -142,6 +144,9 @@ public void testRecreateTestBucketS3Express() throws Throwable {
public void testRecreateTestBucketNonS3Express() throws Throwable {
assumeNotS3ExpressFileSystem(fs);
assumeStoreAwsHosted(fs);
+ // fix a region if resolution is handed down to sdk
+ assume("Skipping as SDK region logic active",
+ !isSdkRegion(region));
intercept(AWSBadRequestException.class, OWNED,
() -> bucketTool.exec("bucket", d(CREATE),
d(OPT_REGION), region,