Skip to content
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
43e0967
HADOOP-19654. Upgrade AWS SDK to 2.32.23
steveloughran Aug 18, 2025
78bd12d
HADOOP-19654. Restore support for third party stores.
steveloughran Aug 20, 2025
a7f022a
HADOOP-19592. S3A: S3 Express bucket failure of conditional overwrite…
steveloughran Aug 21, 2025
befeb67
HADOOP-19003. S3A Assume role tests failing against S3Express stores
steveloughran Aug 21, 2025
7eeb4b5
HADOOP-19654. create session now always seems to get called
steveloughran Aug 21, 2025
80813ad
HADOOP-19654. S3A: AWS SDK to 2.33.8
steveloughran Sep 15, 2025
e7bd5a5
HADOOP-19654. SDK update: ITestAWSStatisticCollection fails against s…
steveloughran Sep 17, 2025
0ff9c0b
HADOOP-19654. review comments
steveloughran Sep 18, 2025
f21c0f8
HADOOP-19654. S3A: options to disable checksum calculation & restore …
steveloughran Oct 14, 2025
44dc59b
HADOOP-19654. third party stores
steveloughran Oct 15, 2025
9d6589c
HADOOP-19654. third party stores and signing
steveloughran Oct 15, 2025
79b52bf
HADOOP-19654. third party stores and MPU commit retries
steveloughran Oct 16, 2025
0ff1bde
HADOOP-19654. SDK upgrade: S3 Express
steveloughran Oct 16, 2025
a119205
Cloudstore: etag command; new version
steveloughran Oct 21, 2025
c3a58aa
HADOOP-19654. Tests to skip as appropriate when MPU is disabled.
steveloughran Oct 21, 2025
7fec7de
HADOOP-19654. Tests to skip as appropriate when MPU is disabled.
steveloughran Oct 21, 2025
8d6c619
HADOOP-19654. Yetus
steveloughran Oct 22, 2025
5fd7aa8
HADOOP-19654. get the right config for behaviour
steveloughran Oct 23, 2025
11e8068
HADOOP-19654. remove intermittent and needless assertion failure
steveloughran Oct 23, 2025
71133a5
HADOOP-19654. Upgrade SDK version to 2.35.4
steveloughran Oct 24, 2025
5fa39b2
HADOOP-19654. Transient test failure due to cached fs not having perf…
steveloughran Oct 24, 2025
f1f85a3
HADOOP-19654. SDK checksum stabilisation
steveloughran Oct 24, 2025
b0217b1
HADOOP-19654. SDK checksum doc/javadoc tuning
steveloughran Oct 24, 2025
8e67003
HADOOP-19654. Fix AbstractContractUnbufferTest to read reliably
steveloughran Oct 24, 2025
6416c20
HADOOP-19654. yetus feedback on checksum changes
steveloughran Nov 3, 2025
f624ea8
HADOOP-19654. cleanup
steveloughran Nov 4, 2025
9a5ee3e
HADOOP-19654. ITestConnectionTimeouts requires classic stream.
steveloughran Nov 4, 2025
ff3fade
HADOOP-19654. Can't use path access and FIPS endpoint (documented; no…
steveloughran Nov 4, 2025
8c58e94
HADOOP-19654. SDK update: doc comments
steveloughran Nov 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions LICENSE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ io.reactivex:rxnetty:0.4.20
io.swagger:swagger-annotations:1.5.4
javax.inject:javax.inject:1
net.java.dev.jna:jna:5.2.0
net.minidev:accessors-smart:1.2
net.minidev:accessors-smart:1.21
org.apache.avro:avro:1.11.4
org.apache.commons:commons-compress:1.26.1
org.apache.commons:commons-configuration2:2.10.1
Expand Down Expand Up @@ -419,7 +419,7 @@ org.xerial.snappy:snappy-java:1.1.10.4
org.yaml:snakeyaml:2.0
org.wildfly.openssl:wildfly-openssl:2.2.5.Final
ro.isdc.wro4j:wro4j-maven-plugin:1.8.0
software.amazon.awssdk:bundle:2.29.52
software.amazon.awssdk:bundle:2.35.4
software.amazon.s3.analyticsaccelerator:analyticsaccelerator-s3:1.3.0
net.jodah:failsafe:2.4.4

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.apache.hadoop.fs.contract;

import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

Expand All @@ -30,6 +31,7 @@

import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.contract.ContractTestUtils.readNBytes;

/**
* Contract tests for {@link org.apache.hadoop.fs.CanUnbuffer#unbuffer}.
Expand Down Expand Up @@ -145,10 +147,12 @@ protected void validateFileContents(FSDataInputStream stream, int length,
int startIndex)
throws IOException {
byte[] streamData = new byte[length];
assertEquals(length, stream.read(streamData),
"failed to read expected number of bytes from "
+ "stream. This may be transient");
final int read = readNBytes(stream, streamData, 0, length);
Assertions.assertThat(read)
.describedAs("failed to read expected number of bytes from stream. %s", stream)
.isEqualTo(length);
byte[] validateFileBytes;

if (startIndex == 0 && length == fileBytes.length) {
validateFileBytes = fileBytes;
} else {
Expand Down
2 changes: 1 addition & 1 deletion hadoop-project/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@
<make-maven-plugin.version>1.0-beta-1</make-maven-plugin.version>
<surefire.fork.timeout>900</surefire.fork.timeout>
<aws-java-sdk.version>1.12.720</aws-java-sdk.version>
<aws-java-sdk-v2.version>2.29.52</aws-java-sdk-v2.version>
<aws-java-sdk-v2.version>2.35.4</aws-java-sdk-v2.version>
<amazon-s3-encryption-client-java.version>3.1.1</amazon-s3-encryption-client-java.version>
<amazon-s3-analyticsaccelerator-s3.version>1.3.0</amazon-s3-analyticsaccelerator-s3.version>
<aws.eventstream.version>1.0.1</aws.eventstream.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,8 @@ public String getMessage() {
public boolean retryable() {
return getCause().retryable();
}

public String getOperation() {
return operation;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@
* Status code 443, no response from server. This is considered idempotent.
*/
public class AWSNoResponseException extends AWSServiceIOException {

/**
* Constructor.
* @param operation operation in progress.
* @param cause inner cause
*/
public AWSNoResponseException(String operation,
AwsServiceException cause) {
super(operation, cause);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.s3a.impl.ChecksumSupport;
import org.apache.hadoop.fs.s3a.impl.streams.StreamIntegration;
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;

Expand Down Expand Up @@ -1836,15 +1837,53 @@ private Constants() {
*/
public static final boolean CHECKSUM_VALIDATION_DEFAULT = false;

/**
* Should checksums always be generated?
* Not all third-party stores like this being enabled for every request.
* Value: {@value}.
*/
public static final String CHECKSUM_GENERATION =
"fs.s3a.checksum.generation";

/**
* Default value of {@link #CHECKSUM_GENERATION}.
* Value: {@value}.
*/
public static final boolean DEFAULT_CHECKSUM_GENERATION = false;

/**
* Indicates the algorithm used to create the checksum for the object
* to be uploaded to S3. Unset by default. It supports the following values:
* 'CRC32', 'CRC32C', 'SHA1', and 'SHA256'
* 'CRC32', 'CRC32C', 'SHA1', 'SHA256', 'CRC64_NVME 'NONE', ''.
* When checksum calculation is enabled this MUST be set to a valid algorithm.
* value:{@value}
*/
public static final String CHECKSUM_ALGORITHM =
"fs.s3a.create.checksum.algorithm";

/**
* Default checksum algorithm: {@code "NONE"}.
*/
public static final String DEFAULT_CHECKSUM_ALGORITHM =
ChecksumSupport.NONE;

/**
* Send a {@code Content-MD5 header} with every request.
* This is required when performing some operations with third party stores
* For example: bulk delete).
* It is supported by AWS S3, though has unexpected behavior with AWS S3 Express storage.
* See https://github.com/aws/aws-sdk-java-v2/issues/6459 for details.
*/
public static final String REQUEST_MD5_HEADER =
"fs.s3a.request.md5.header";

/**
* Default value of {@link #REQUEST_MD5_HEADER}.
* Value: {@value}.
*/
public static final boolean DEFAULT_REQUEST_MD5_HEADER = true;


/**
* Are extensions classes, such as {@code fs.s3a.aws.credentials.provider},
* going to be loaded from the same classloader that loaded
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import org.slf4j.LoggerFactory;

import software.amazon.awssdk.awscore.util.AwsHostNameUtils;
import software.amazon.awssdk.core.checksums.RequestChecksumCalculation;
import software.amazon.awssdk.core.checksums.ResponseChecksumValidation;
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption;
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
Expand All @@ -41,6 +43,7 @@
import software.amazon.awssdk.metrics.LoggingMetricPublisher;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.s3accessgrants.plugin.S3AccessGrantsPlugin;
import software.amazon.awssdk.services.s3.LegacyMd5Plugin;
import software.amazon.awssdk.services.s3.S3AsyncClient;
import software.amazon.awssdk.services.s3.S3AsyncClientBuilder;
import software.amazon.awssdk.services.s3.S3BaseClientBuilder;
Expand Down Expand Up @@ -202,11 +205,34 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> Build

configureEndpointAndRegion(builder, parameters, conf);

// add a plugin to add a Content-MD5 header.
// this is required when performing some operations with third party stores
// (for example: bulk delete), and is somewhat harmless when working with AWS S3.
if (parameters.isMd5HeaderEnabled()) {
LOG.debug("MD5 header enabled");
builder.addPlugin(LegacyMd5Plugin.create());
}

//when to calculate request checksums.
final RequestChecksumCalculation checksumCalculation =
parameters.isChecksumCalculationEnabled()
? RequestChecksumCalculation.WHEN_SUPPORTED
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking if we could have some docs around the WHEN_SUPPORTED and WHEN_REQUIRED

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its confusing. What happens if it is required but not supported.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

some operations require checksums (bulk delete?) and everything which implemented them has had to expect checksums. This new generation option, "when supported" is what broke things as it really means "generate checksums on all requests". There are only two values in the enum, so the sdk always has to choose one.

when_supported

  • doesn't work for most third party stores
  • seems to break MPUs if you don't set a content checksum for put/posted data.

I think having a generation "true/false" is simpler for people to understand than the nuances of when_supported vs when_required.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it should be just true/false. @ahmarsuhail could you please talk to the SDK team for this. Why they did this way?

: RequestChecksumCalculation.WHEN_REQUIRED;
LOG.debug("Using checksum calculation policy: {}", checksumCalculation);
builder.requestChecksumCalculation(checksumCalculation);

// response checksum validation. Slow, even with CRC32 checksums.
final ResponseChecksumValidation checksumValidation;
checksumValidation = parameters.isChecksumValidationEnabled()
? ResponseChecksumValidation.WHEN_SUPPORTED
: ResponseChecksumValidation.WHEN_REQUIRED;
LOG.debug("Using checksum validation policy: {}", checksumValidation);
builder.responseChecksumValidation(checksumValidation);

maybeApplyS3AccessGrantsConfigurations(builder, conf);

S3Configuration serviceConfiguration = S3Configuration.builder()
.pathStyleAccessEnabled(parameters.isPathStyleAccess())
.checksumValidationEnabled(parameters.isChecksumValidationEnabled())
.build();

final ClientOverrideConfiguration.Builder override =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1173,10 +1173,15 @@ private ClientManager createClientManager(URI fsURI, boolean dtEnabled) throws I
.withTransferManagerExecutor(unboundedThreadPool)
.withRegion(configuredRegion)
.withFipsEnabled(fipsEnabled)
.withS3ExpressStore(s3ExpressStore)
.withExpressCreateSession(
conf.getBoolean(S3EXPRESS_CREATE_SESSION, S3EXPRESS_CREATE_SESSION_DEFAULT))
.withChecksumValidationEnabled(
conf.getBoolean(CHECKSUM_VALIDATION, CHECKSUM_VALIDATION_DEFAULT))
.withChecksumCalculationEnabled(
conf.getBoolean(CHECKSUM_GENERATION, DEFAULT_CHECKSUM_GENERATION))
.withMd5HeaderEnabled(conf.getBoolean(REQUEST_MD5_HEADER,
DEFAULT_REQUEST_MD5_HEADER))
.withClientSideEncryptionEnabled(isCSEEnabled)
.withClientSideEncryptionMaterials(cseMaterials)
.withAnalyticsAcceleratorEnabled(isAnalyticsAcceleratorEnabled)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.apache.hadoop.fs.s3a;

import software.amazon.awssdk.awscore.exception.AwsErrorDetails;
import software.amazon.awssdk.awscore.exception.AwsServiceException;
import software.amazon.awssdk.core.exception.AbortedException;
import software.amazon.awssdk.core.exception.ApiCallAttemptTimeoutException;
Expand Down Expand Up @@ -240,8 +241,13 @@ public static IOException translateException(@Nullable String operation,
? (S3Exception) ase
: null;
int status = ase.statusCode();
if (ase.awsErrorDetails() != null) {
message = message + ":" + ase.awsErrorDetails().errorCode();
// error details, may be null
final AwsErrorDetails errorDetails = ase.awsErrorDetails();
// error code, will be null if errorDetails is null
String errorCode = "";
if (errorDetails != null) {
errorCode = errorDetails.errorCode();
message = message + ":" + errorCode;
}

// big switch on the HTTP status code.
Expand Down Expand Up @@ -308,6 +314,8 @@ public static IOException translateException(@Nullable String operation,
// precondition failure: the object is there, but the precondition
// (e.g. etag) didn't match. Assume remote file change during
// rename or status passed in to openfile had an etag which didn't match.
// See the SC_200 handler for the treatment of the S3 Express failure
// variant.
case SC_412_PRECONDITION_FAILED:
ioe = new RemoteFileChangedException(path, message, "", ase);
break;
Expand Down Expand Up @@ -352,6 +360,16 @@ public static IOException translateException(@Nullable String operation,
return ((MultiObjectDeleteException) exception)
.translateException(message);
}
if (PRECONDITION_FAILED.equals(errorCode)) {
// S3 Express stores report conflict in conditional writes
// as a 200 + an error code of "PreconditionFailed".
// This is mapped to RemoteFileChangedException for consistency
// with SC_412_PRECONDITION_FAILED handling.
return new RemoteFileChangedException(path,
operation,
exception.getMessage(),
exception);
}
// other 200: FALL THROUGH

default:
Expand Down
Loading