diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index 56ea7d000690a..06f447ace2dc9 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -410,6 +410,12 @@ private Constants() {
public static final String CANNED_ACL = "fs.s3a.acl.default";
public static final String DEFAULT_CANNED_ACL = "";
+ /**
+ * Content encoding: gzip, deflate, compress, br, etc.
+ * Value {@value}.
+ */
+ public static final String CONTENT_ENCODING = "fs.s3a.object.content.encoding";
+
// should we try to purge old multipart uploads when starting up
public static final String PURGE_EXISTING_MULTIPART =
"fs.s3a.multipart.purge";
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index bc3e7ea5a5640..c80670910f950 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -938,12 +938,16 @@ protected RequestFactory createRequestFactory() {
// request factory.
initCannedAcls(getConf());
+ // Any encoding type
+ String contentEncoding = getConf().getTrimmed(CONTENT_ENCODING, null);
+
return RequestFactoryImpl.builder()
.withBucket(requireNonNull(bucket))
.withCannedACL(getCannedACL())
.withEncryptionSecrets(requireNonNull(encryptionSecrets))
.withMultipartPartCountLimit(partCountLimit)
.withRequestPreparer(getAuditManager()::requestCreated)
+ .withContentEncoding(contentEncoding)
.build();
}
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java
index 9bffcc90d0bd4..ee5728688b74e 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java
@@ -100,6 +100,12 @@ public interface RequestFactory {
*/
S3AEncryptionMethods getServerSideEncryptionAlgorithm();
+ /**
+ * Get the content encoding (e.g. gzip) or return null if none.
+ * @return content encoding
+ */
+ String getContentEncoding();
+
/**
* Create a new object metadata instance.
* Any standard metadata headers are added here, for example:
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java
index 17394b701e6f8..f75066e049d3e 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java
@@ -83,7 +83,7 @@ public class HeaderProcessing extends AbstractStoreOperation {
XA_HEADER_PREFIX + Headers.CONTENT_DISPOSITION;
/**
- * Standard HTTP header found on some S3 objects: {@value}.
+ * Content encoding; can be configured: {@value}.
*/
public static final String XA_CONTENT_ENCODING =
XA_HEADER_PREFIX + Headers.CONTENT_ENCODING;
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java
index f9ff08a5f6542..db4a6090e328b 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java
@@ -118,6 +118,11 @@ public class RequestFactoryImpl implements RequestFactory {
*/
private final PrepareRequest requestPreparer;
+ /**
+ * Content encoding (null for none).
+ */
+ private final String contentEncoding;
+
/**
* Constructor.
* @param builder builder with all the configuration.
@@ -130,6 +135,7 @@ protected RequestFactoryImpl(
this.multipartPartCountLimit = builder.multipartPartCountLimit;
this.requesterPays = builder.requesterPays;
this.requestPreparer = builder.requestPreparer;
+ this.contentEncoding = builder.contentEncoding;
}
/**
@@ -193,6 +199,15 @@ public S3AEncryptionMethods getServerSideEncryptionAlgorithm() {
return encryptionSecrets.getEncryptionMethod();
}
+ /**
+ * Get the content encoding (e.g. gzip) or return null if none.
+ * @return content encoding
+ */
+ @Override
+ public String getContentEncoding() {
+ return contentEncoding;
+ }
+
/**
* Sets server side encryption parameters to the part upload
* request when encryption is enabled.
@@ -236,13 +251,18 @@ protected void setOptionalPutRequestParameters(PutObjectRequest request) {
/**
* Set the optional metadata for an object being created or copied.
* @param metadata to update.
+ * @param isDirectoryMarker is this for a directory marker?
*/
- protected void setOptionalObjectMetadata(ObjectMetadata metadata) {
+ protected void setOptionalObjectMetadata(ObjectMetadata metadata,
+ boolean isDirectoryMarker) {
final S3AEncryptionMethods algorithm
= getServerSideEncryptionAlgorithm();
if (S3AEncryptionMethods.SSE_S3 == algorithm) {
metadata.setSSEAlgorithm(algorithm.getMethod());
}
+ if (contentEncoding != null && !isDirectoryMarker) {
+ metadata.setContentEncoding(contentEncoding);
+ }
}
/**
@@ -255,8 +275,21 @@ protected void setOptionalObjectMetadata(ObjectMetadata metadata) {
*/
@Override
public ObjectMetadata newObjectMetadata(long length) {
+ return createObjectMetadata(length, false);
+ }
+
+ /**
+ * Create a new object metadata instance.
+ * Any standard metadata headers are added here, for example:
+ * encryption.
+ *
+ * @param length length of data to set in header; Ignored if negative
+ * @param isDirectoryMarker is this for a directory marker?
+ * @return a new metadata instance
+ */
+ private ObjectMetadata createObjectMetadata(long length, boolean isDirectoryMarker) {
final ObjectMetadata om = new ObjectMetadata();
- setOptionalObjectMetadata(om);
+ setOptionalObjectMetadata(om, isDirectoryMarker);
if (length >= 0) {
om.setContentLength(length);
}
@@ -271,7 +304,7 @@ public CopyObjectRequest newCopyObjectRequest(String srcKey,
new CopyObjectRequest(getBucket(), srcKey, getBucket(), dstKey);
ObjectMetadata dstom = newObjectMetadata(srcom.getContentLength());
HeaderProcessing.cloneObjectMetadata(srcom, dstom);
- setOptionalObjectMetadata(dstom);
+ setOptionalObjectMetadata(dstom, false);
copyEncryptionParameters(srcom, copyObjectRequest);
copyObjectRequest.setCannedAccessControlList(cannedACL);
copyObjectRequest.setNewObjectMetadata(dstom);
@@ -371,7 +404,7 @@ public int read() throws IOException {
}
};
// preparation happens in here
- final ObjectMetadata md = newObjectMetadata(0L);
+ final ObjectMetadata md = createObjectMetadata(0L, true);
md.setContentType(HeaderProcessing.CONTENT_TYPE_X_DIRECTORY);
PutObjectRequest putObjectRequest =
newPutObjectRequest(key, md, im);
@@ -586,6 +619,9 @@ public static final class RequestFactoryBuilder {
/** Requester Pays flag. */
private boolean requesterPays = false;
+ /** Content Encoding. */
+ private String contentEncoding;
+
/**
* Multipart limit.
*/
@@ -607,6 +643,16 @@ public RequestFactory build() {
return new RequestFactoryImpl(this);
}
+ /**
+ * Content encoding.
+ * @param value new value
+ * @return the builder
+ */
+ public RequestFactoryBuilder withContentEncoding(final String value) {
+ contentEncoding = value;
+ return this;
+ }
+
/**
* Target bucket.
* @param value new value
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index f4f7144f34f16..63ff8504cbe56 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -1080,6 +1080,17 @@ options are covered in [Testing](./testing.md).
client has permission to read the bucket.
+
+
+ fs.s3a.object.content.encoding
+
+
+ Content encoding: gzip, deflate, compress, br, etc.
+ This will be set in the "Content-Encoding" header of the object,
+ and returned in HTTP HEAD/GET requests.
+
+
+
```
## Retry and Recovery
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java
new file mode 100644
index 0000000000000..4a96bf5da91d7
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.assertj.core.api.Assertions;
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.CONTENT_ENCODING;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.XA_CONTENT_ENCODING;
+import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.decodeBytes;
+
+/**
+ * Tests of content encoding object meta data.
+ */
+public class ITestS3AContentEncoding extends AbstractS3ATestBase {
+
+ private static final String GZIP = "gzip";
+
+ @Override
+ protected Configuration createConfiguration() {
+ Configuration conf = super.createConfiguration();
+ removeBaseAndBucketOverrides(conf, CONTENT_ENCODING);
+ conf.set(CONTENT_ENCODING, GZIP);
+
+ return conf;
+ }
+
+ @Test
+ public void testCreatedObjectsHaveEncoding() throws Throwable {
+ S3AFileSystem fs = getFileSystem();
+ Path dir = methodPath();
+ fs.mkdirs(dir);
+ // even with content encoding enabled, directories do not have
+ // encoding.
+ Assertions.assertThat(getEncoding(dir))
+ .describedAs("Encoding of object %s", dir)
+ .isNull();
+ Path path = new Path(dir, "1");
+ ContractTestUtils.touch(fs, path);
+ assertObjectHasEncoding(path);
+ Path path2 = new Path(dir, "2");
+ fs.rename(path, path2);
+ assertObjectHasEncoding(path2);
+ }
+
+ /**
+ * Assert that a given object has gzip encoding specified.
+ * @param path path
+ *
+ */
+ private void assertObjectHasEncoding(Path path) throws Throwable {
+ Assertions.assertThat(getEncoding(path))
+ .describedAs("Encoding of object %s", path)
+ .isEqualTo(GZIP);
+ }
+
+ /**
+ * Get the encoding of a path.
+ * @param path path
+ * @return encoding string or null
+ * @throws IOException IO Failure.
+ */
+ private String getEncoding(Path path) throws IOException {
+ S3AFileSystem fs = getFileSystem();
+
+ Map xAttrs = fs.getXAttrs(path);
+ return decodeBytes(xAttrs.get(XA_CONTENT_ENCODING));
+ }
+}