diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index ea3de1da4b443..d17df133822e3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -410,6 +410,9 @@ private Constants() { public static final String CANNED_ACL = "fs.s3a.acl.default"; public static final String DEFAULT_CANNED_ACL = ""; + // gzip, deflate, compress, br, etc. + public static final String CONTENT_ENCODING = "fs.s3a.content.encoding"; + // should we try to purge old multipart uploads when starting up public static final String PURGE_EXISTING_MULTIPART = "fs.s3a.multipart.purge"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index d35074f7249f0..c69e695e720fd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -925,12 +925,16 @@ protected RequestFactory createRequestFactory() { // request factory. initCannedAcls(getConf()); + // Any encoding type + String contentEncoding = getConf().getTrimmed(CONTENT_ENCODING, null); + return RequestFactoryImpl.builder() .withBucket(requireNonNull(bucket)) .withCannedACL(getCannedACL()) .withEncryptionSecrets(requireNonNull(encryptionSecrets)) .withMultipartPartCountLimit(partCountLimit) .withRequestPreparer(getAuditManager()::requestCreated) + .withContentEncoding(contentEncoding) .build(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java index 9bffcc90d0bd4..ee5728688b74e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java @@ -100,6 +100,12 @@ public interface RequestFactory { */ S3AEncryptionMethods getServerSideEncryptionAlgorithm(); + /** + * Get the content encoding (e.g. gzip) or return null if none. + * @return content encoding + */ + String getContentEncoding(); + /** * Create a new object metadata instance. * Any standard metadata headers are added here, for example: diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index f9ff08a5f6542..21de5a0a6d760 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -118,6 +118,11 @@ public class RequestFactoryImpl implements RequestFactory { */ private final PrepareRequest requestPreparer; + /** + * Content encoding (null for none). + */ + private final String contentEncoding; + /** * Constructor. * @param builder builder with all the configuration. @@ -130,6 +135,7 @@ protected RequestFactoryImpl( this.multipartPartCountLimit = builder.multipartPartCountLimit; this.requesterPays = builder.requesterPays; this.requestPreparer = builder.requestPreparer; + this.contentEncoding = builder.contentEncoding; } /** @@ -193,6 +199,15 @@ public S3AEncryptionMethods getServerSideEncryptionAlgorithm() { return encryptionSecrets.getEncryptionMethod(); } + /** + * Get the content encoding (e.g. gzip) or return null if none. + * @return content encoding + */ + @Override + public String getContentEncoding() { + return contentEncoding; + } + /** * Sets server side encryption parameters to the part upload * request when encryption is enabled. @@ -243,6 +258,9 @@ protected void setOptionalObjectMetadata(ObjectMetadata metadata) { if (S3AEncryptionMethods.SSE_S3 == algorithm) { metadata.setSSEAlgorithm(algorithm.getMethod()); } + if (contentEncoding != null) { + metadata.setContentEncoding(contentEncoding); + } } /** @@ -586,6 +604,9 @@ public static final class RequestFactoryBuilder { /** Requester Pays flag. */ private boolean requesterPays = false; + /** Content Encoding. */ + private String contentEncoding; + /** * Multipart limit. */ @@ -607,6 +628,16 @@ public RequestFactory build() { return new RequestFactoryImpl(this); } + /** + * Content encoding. + * @param value new value + * @return the builder + */ + public RequestFactoryBuilder withContentEncoding(final String value) { + contentEncoding = value; + return this; + } + /** * Target bucket. * @param value new value diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java new file mode 100644 index 0000000000000..7b3b382d40a1c --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.util.Map; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.XA_CONTENT_ENCODING; +import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.decodeBytes; +import org.apache.hadoop.fs.s3a.impl.StoreContext; + +import static org.apache.hadoop.fs.s3a.Constants.CONTENT_ENCODING; + +/** + * Tests of content encoding object meta data. + */ +public class ITestS3AContentEncoding extends AbstractS3ATestBase { + + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + conf.set(CONTENT_ENCODING, "gzip"); + + return conf; + } + + @Test + public void testCreatedObjectsHaveEncoding() throws Throwable { + S3AFileSystem fs = getFileSystem(); + Path dir = methodPath(); + fs.mkdirs(dir); + Path path = new Path(dir, "1"); + ContractTestUtils.touch(fs, path); + assertObjectHasEncoding(path); + Path path2 = new Path(dir, "2"); + fs.rename(path, path2); + assertObjectHasEncoding(path2); + } + + /** + * Assert that a given object has gzip encoding specified. + * @param path path + */ + private void assertObjectHasEncoding(Path path) throws Throwable { + S3AFileSystem fs = getFileSystem(); + + StoreContext storeContext = fs.createStoreContext(); + Map xAttrs = fs.getXAttrs(path); + String encoding = decodeBytes(xAttrs.get(XA_CONTENT_ENCODING)); + Assertions.assertThat(encoding) + .describedAs("Encoding of object %s should be gzip, is %s", path, encoding) + .isEqualTo("gzip"); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java index 3a8cf7a11d666..9f6f553dc1932 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java @@ -39,8 +39,8 @@ protected Configuration createConfiguration() { // get the KMS key for this test. Configuration c = new Configuration(); String kmsKey = c.get(SERVER_SIDE_ENCRYPTION_KEY); - if (StringUtils.isBlank(kmsKey) || !c.get(SERVER_SIDE_ENCRYPTION_ALGORITHM) - .equals(S3AEncryptionMethods.CSE_KMS.name())) { + String encryptionAlgorithm = c.get(SERVER_SIDE_ENCRYPTION_ALGORITHM, ""); + if (StringUtils.isBlank(kmsKey) || !encryptionAlgorithm.equals(S3AEncryptionMethods.CSE_KMS.name())) { skip(SERVER_SIDE_ENCRYPTION_KEY + " is not set for " + SSE_KMS.getMethod() + " or CSE-KMS algorithm is used instead of " + "SSE-KMS"); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesEncryption.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesEncryption.java index be51b2fa09cb1..8c1b07b338cd8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesEncryption.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesEncryption.java @@ -45,8 +45,9 @@ public class ITestS3AHugeFilesEncryption extends AbstractSTestS3AHugeFiles { public void setup() throws Exception { Configuration c = new Configuration(); String kmsKey = c.get(SERVER_SIDE_ENCRYPTION_KEY); - if (StringUtils.isBlank(kmsKey) || !c.get(SERVER_SIDE_ENCRYPTION_ALGORITHM) - .equals(S3AEncryptionMethods.CSE_KMS.name())) { + String encryptionAlgorithm = c.get(SERVER_SIDE_ENCRYPTION_ALGORITHM); + if (kmsKey == null || StringUtils.isBlank(kmsKey) || encryptionAlgorithm == null || + !encryptionAlgorithm.equals(S3AEncryptionMethods.CSE_KMS.name())) { skip(SERVER_SIDE_ENCRYPTION_KEY + " is not set for " + SSE_KMS.getMethod() + " or CSE-KMS algorithm is used instead of " + "SSE-KMS");