From 83e396acd54409dac959188505bc211e80bccb7c Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 17 Aug 2021 11:50:19 -0700 Subject: [PATCH 01/10] Add the ability for users to specify the content encoding of the objects they are putting. This is useful for people loading the data into other tools in the AWS ecosystem which don't use file extensions to infer compression type (e.g. serving compressed files from S3 or importing into RDS) --- .../org/apache/hadoop/fs/s3a/Constants.java | 4 +++ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 4 +++ .../hadoop/fs/s3a/api/RequestFactory.java | 6 ++++ .../fs/s3a/impl/RequestFactoryImpl.java | 30 +++++++++++++++++++ 4 files changed, 44 insertions(+) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index ea3de1da4b443..a9c743c9b93a7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -410,6 +410,10 @@ private Constants() { public static final String CANNED_ACL = "fs.s3a.acl.default"; public static final String DEFAULT_CANNED_ACL = ""; + // gzip, deflate, compress, br, etc. + public static final String CONTENT_ENCODING = "fs.s3a.contentEncoding"; + public static final String DEFAULT_CONTENT_ENCODING = null; + // should we try to purge old multipart uploads when starting up public static final String PURGE_EXISTING_MULTIPART = "fs.s3a.multipart.purge"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index d35074f7249f0..b15284e4c18fe 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -925,12 +925,16 @@ protected RequestFactory createRequestFactory() { // request factory. initCannedAcls(getConf()); + // Any encoding type + String contentEncoding = getConf().get(CONTENT_ENCODING, DEFAULT_CONTENT_ENCODING); + return RequestFactoryImpl.builder() .withBucket(requireNonNull(bucket)) .withCannedACL(getCannedACL()) .withEncryptionSecrets(requireNonNull(encryptionSecrets)) .withMultipartPartCountLimit(partCountLimit) .withRequestPreparer(getAuditManager()::requestCreated) + .withContentEncoding(contentEncoding) .build(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java index 9bffcc90d0bd4..1f076a7d2254f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java @@ -100,6 +100,12 @@ public interface RequestFactory { */ S3AEncryptionMethods getServerSideEncryptionAlgorithm(); + /** + * Get the content encoding (e.g. gzip) or return null if none. + * @return content encoding + */ + public String getContentEncoding(); + /** * Create a new object metadata instance. * Any standard metadata headers are added here, for example: diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index f9ff08a5f6542..67196bbbf31d3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -118,6 +118,11 @@ public class RequestFactoryImpl implements RequestFactory { */ private final PrepareRequest requestPreparer; + /** + * Content encoding (null for none). + */ + private final String contentEncoding; + /** * Constructor. * @param builder builder with all the configuration. @@ -130,6 +135,7 @@ protected RequestFactoryImpl( this.multipartPartCountLimit = builder.multipartPartCountLimit; this.requesterPays = builder.requesterPays; this.requestPreparer = builder.requestPreparer; + this.contentEncoding = builder.contentEncoding; } /** @@ -193,6 +199,15 @@ public S3AEncryptionMethods getServerSideEncryptionAlgorithm() { return encryptionSecrets.getEncryptionMethod(); } + /** + * Get the content encoding (e.g. gzip) or return null if none. + * @return content encoding + */ + @Override + public String getContentEncoding() { + return contentEncoding; + } + /** * Sets server side encryption parameters to the part upload * request when encryption is enabled. @@ -238,6 +253,8 @@ protected void setOptionalPutRequestParameters(PutObjectRequest request) { * @param metadata to update. */ protected void setOptionalObjectMetadata(ObjectMetadata metadata) { + final String contentEncoding = + getContentEncoding(); final S3AEncryptionMethods algorithm = getServerSideEncryptionAlgorithm(); if (S3AEncryptionMethods.SSE_S3 == algorithm) { @@ -586,6 +603,9 @@ public static final class RequestFactoryBuilder { /** Requester Pays flag. */ private boolean requesterPays = false; + /** Content Encoding. */ + private String contentEncoding = null; + /** * Multipart limit. */ @@ -607,6 +627,16 @@ public RequestFactory build() { return new RequestFactoryImpl(this); } + /** + * Content encoding. + * @param value new value + * @return the builder + */ + public RequestFactoryBuilder withContentEncoding(final String value) { + contentEncoding = value; + return this; + } + /** * Target bucket. * @param value new value From 4472112ce8dfe4aed2229e718bd8d526f427fd7c Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 17 Aug 2021 14:39:33 -0700 Subject: [PATCH 02/10] Add an integration test --- .../fs/s3a/ITestS3AContentEncoding.java | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java new file mode 100644 index 0000000000000..b352475fcb638 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.util.List; + +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.ObjectMetadata; +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.audit.S3AAuditConstants; +import org.apache.hadoop.fs.s3a.impl.StoreContext; + +import static org.apache.hadoop.fs.s3a.Constants.CONTENT_ENCODING; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; + +/** + * Tests of content encoding object meta data. + */ +public class ITestS3AContentEncoding extends AbstractS3ATestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(ITestS3ACannedACLs.class); + + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + conf.set(CONTENT_ENCODING, "gzip"); + + return conf; + } + + @Test + public void testCreatedObjectsHaveEncoding() throws Throwable { + S3AFileSystem fs = getFileSystem(); + Path dir = methodPath(); + fs.mkdirs(dir); + Path path = new Path(dir, "1"); + ContractTestUtils.touch(fs, path); + assertObjectHasEncoding(path); + } + + /** + * Assert that a given object has gzip encoding specified + * @param path path + */ + private void assertObjectHasEncoding(Path path) { + S3AFileSystem fs = getFileSystem(); + + StoreContext storeContext = fs.createStoreContext(); + AmazonS3 s3 = fs.getAmazonS3ClientForTesting("encoding"); + String key = storeContext.pathToKey(path); + ObjectMetadata meta = s3.getObjectMetadata(storeContext.getBucket(), + key); + String encoding = meta.getContentEncoding(); + Assertions.assertThat(encoding) + .describedAs("Encoding of object %s is gzip", path) + .equals("gzip!"); + } +} From aa18197c5f4ed7636d811b410a9a8c7d50a1dec2 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 18 Aug 2021 12:50:24 -0700 Subject: [PATCH 03/10] Update the ITestS3a*Enc* skip logic to handle nulls, make my ITestS3A test fail. --- .../org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java | 2 +- .../fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java | 5 +++-- .../hadoop/fs/s3a/scale/ITestS3AHugeFilesEncryption.java | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java index b352475fcb638..7ff55a1c17394 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java @@ -77,6 +77,6 @@ private void assertObjectHasEncoding(Path path) { String encoding = meta.getContentEncoding(); Assertions.assertThat(encoding) .describedAs("Encoding of object %s is gzip", path) - .equals("gzip!"); + .isEqualTo("gzip!"); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java index 3a8cf7a11d666..fc69c602ab1e3 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java @@ -39,8 +39,9 @@ protected Configuration createConfiguration() { // get the KMS key for this test. Configuration c = new Configuration(); String kmsKey = c.get(SERVER_SIDE_ENCRYPTION_KEY); - if (StringUtils.isBlank(kmsKey) || !c.get(SERVER_SIDE_ENCRYPTION_ALGORITHM) - .equals(S3AEncryptionMethods.CSE_KMS.name())) { + String encryptionAlgorithm = c.get(SERVER_SIDE_ENCRYPTION_ALGORITHM); + if (kmsKey == null || StringUtils.isBlank(kmsKey) || encryptionAlgorithm == null || + !encryptionAlgorithm.equals(S3AEncryptionMethods.CSE_KMS.name())) { skip(SERVER_SIDE_ENCRYPTION_KEY + " is not set for " + SSE_KMS.getMethod() + " or CSE-KMS algorithm is used instead of " + "SSE-KMS"); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesEncryption.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesEncryption.java index be51b2fa09cb1..8c1b07b338cd8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesEncryption.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesEncryption.java @@ -45,8 +45,9 @@ public class ITestS3AHugeFilesEncryption extends AbstractSTestS3AHugeFiles { public void setup() throws Exception { Configuration c = new Configuration(); String kmsKey = c.get(SERVER_SIDE_ENCRYPTION_KEY); - if (StringUtils.isBlank(kmsKey) || !c.get(SERVER_SIDE_ENCRYPTION_ALGORITHM) - .equals(S3AEncryptionMethods.CSE_KMS.name())) { + String encryptionAlgorithm = c.get(SERVER_SIDE_ENCRYPTION_ALGORITHM); + if (kmsKey == null || StringUtils.isBlank(kmsKey) || encryptionAlgorithm == null || + !encryptionAlgorithm.equals(S3AEncryptionMethods.CSE_KMS.name())) { skip(SERVER_SIDE_ENCRYPTION_KEY + " is not set for " + SSE_KMS.getMethod() + " or CSE-KMS algorithm is used instead of " + "SSE-KMS"); From 59a593d921e862079a888f203dc2a3e71a1e2d15 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 18 Aug 2021 13:05:07 -0700 Subject: [PATCH 04/10] Fix setting the content encoding and change the test to a real one --- .../java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java | 3 +++ .../java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index 67196bbbf31d3..aecaf2417c643 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -260,6 +260,9 @@ protected void setOptionalObjectMetadata(ObjectMetadata metadata) { if (S3AEncryptionMethods.SSE_S3 == algorithm) { metadata.setSSEAlgorithm(algorithm.getMethod()); } + if (contentEncoding != null) { + metadata.setContentEncoding(contentEncoding); + } } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java index 7ff55a1c17394..67a15df2a68f3 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java @@ -77,6 +77,6 @@ private void assertObjectHasEncoding(Path path) { String encoding = meta.getContentEncoding(); Assertions.assertThat(encoding) .describedAs("Encoding of object %s is gzip", path) - .isEqualTo("gzip!"); + .isEqualTo("gzip"); } } From f924ca308d2190f6eb0dc5249138fa275a46ae3b Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 20 Aug 2021 10:51:59 -0700 Subject: [PATCH 05/10] Fix content encoding config var name to match --- .../src/main/java/org/apache/hadoop/fs/s3a/Constants.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index a9c743c9b93a7..d95d936f13191 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -411,7 +411,7 @@ private Constants() { public static final String DEFAULT_CANNED_ACL = ""; // gzip, deflate, compress, br, etc. - public static final String CONTENT_ENCODING = "fs.s3a.contentEncoding"; + public static final String CONTENT_ENCODING = "fs.s3a.content.encoding"; public static final String DEFAULT_CONTENT_ENCODING = null; // should we try to purge old multipart uploads when starting up From 25b6da0b27b2e94ee7970cf916a58b98405ab459 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 20 Aug 2021 10:52:38 -0700 Subject: [PATCH 06/10] Fix some linter errors and add a rename op --- .../org/apache/hadoop/fs/s3a/api/RequestFactory.java | 2 +- .../apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java | 2 -- .../apache/hadoop/fs/s3a/ITestS3AContentEncoding.java | 9 ++++----- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java index 1f076a7d2254f..ee5728688b74e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java @@ -104,7 +104,7 @@ public interface RequestFactory { * Get the content encoding (e.g. gzip) or return null if none. * @return content encoding */ - public String getContentEncoding(); + String getContentEncoding(); /** * Create a new object metadata instance. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index aecaf2417c643..b8266c543dc05 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -253,8 +253,6 @@ protected void setOptionalPutRequestParameters(PutObjectRequest request) { * @param metadata to update. */ protected void setOptionalObjectMetadata(ObjectMetadata metadata) { - final String contentEncoding = - getContentEncoding(); final S3AEncryptionMethods algorithm = getServerSideEncryptionAlgorithm(); if (S3AEncryptionMethods.SSE_S3 == algorithm) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java index 67a15df2a68f3..3affd51aa6e0a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java @@ -18,8 +18,6 @@ package org.apache.hadoop.fs.s3a; -import java.util.List; - import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.ObjectMetadata; import org.assertj.core.api.Assertions; @@ -30,11 +28,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.apache.hadoop.fs.s3a.audit.S3AAuditConstants; import org.apache.hadoop.fs.s3a.impl.StoreContext; import static org.apache.hadoop.fs.s3a.Constants.CONTENT_ENCODING; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; /** * Tests of content encoding object meta data. @@ -60,10 +56,13 @@ public void testCreatedObjectsHaveEncoding() throws Throwable { Path path = new Path(dir, "1"); ContractTestUtils.touch(fs, path); assertObjectHasEncoding(path); + Path path2 = new Path(dir, "2"); + fs.rename(path, path2); + assertObjectHasEncoding(path); } /** - * Assert that a given object has gzip encoding specified + * Assert that a given object has gzip encoding specified. * @param path path */ private void assertObjectHasEncoding(Path path) { From bb6cdfe4bd8c4f2d703312b7f4e0a6d0c35e53c4 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 20 Aug 2021 10:56:50 -0700 Subject: [PATCH 07/10] Switch from using the full s3 client to just using getXattrs oops --- .../org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java index 3affd51aa6e0a..cb98f93955e3f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java @@ -28,6 +28,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.impl.HeaderProcessing.XA_CONTENT_ENCODING; import org.apache.hadoop.fs.s3a.impl.StoreContext; import static org.apache.hadoop.fs.s3a.Constants.CONTENT_ENCODING; @@ -69,11 +70,8 @@ private void assertObjectHasEncoding(Path path) { S3AFileSystem fs = getFileSystem(); StoreContext storeContext = fs.createStoreContext(); - AmazonS3 s3 = fs.getAmazonS3ClientForTesting("encoding"); String key = storeContext.pathToKey(path); - ObjectMetadata meta = s3.getObjectMetadata(storeContext.getBucket(), - key); - String encoding = meta.getContentEncoding(); + String encoding = fs.getXAttrs(XA_CONTENT_ENCODING); Assertions.assertThat(encoding) .describedAs("Encoding of object %s is gzip", path) .isEqualTo("gzip"); From 939059b659772d16915c4ce0bde932f479f313b2 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 10 Sep 2021 10:59:46 -0700 Subject: [PATCH 08/10] getXattr fix --- .../apache/hadoop/fs/s3a/ITestS3AContentEncoding.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java index cb98f93955e3f..f5e27cc5bd19e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java @@ -18,6 +18,8 @@ package org.apache.hadoop.fs.s3a; +import java.util.Map; + import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.ObjectMetadata; import org.assertj.core.api.Assertions; @@ -28,7 +30,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.apache.hadoop.fs.s3a.impl.HeaderProcessing.XA_CONTENT_ENCODING; +import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.XA_CONTENT_ENCODING; +import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.decodeBytes; import org.apache.hadoop.fs.s3a.impl.StoreContext; import static org.apache.hadoop.fs.s3a.Constants.CONTENT_ENCODING; @@ -66,12 +69,12 @@ public void testCreatedObjectsHaveEncoding() throws Throwable { * Assert that a given object has gzip encoding specified. * @param path path */ - private void assertObjectHasEncoding(Path path) { + private void assertObjectHasEncoding(Path path) throws Throwable { S3AFileSystem fs = getFileSystem(); StoreContext storeContext = fs.createStoreContext(); - String key = storeContext.pathToKey(path); - String encoding = fs.getXAttrs(XA_CONTENT_ENCODING); + Map xAttrs = fs.getXAttrs(path); + String encoding = decodeBytes(xAttrs.get(XA_CONTENT_ENCODING)); Assertions.assertThat(encoding) .describedAs("Encoding of object %s is gzip", path) .isEqualTo("gzip"); From 0562940cd5115decfb835cf3b6506abdda9c631f Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 10 Sep 2021 13:17:44 -0700 Subject: [PATCH 09/10] Code review feedback --- .../src/main/java/org/apache/hadoop/fs/s3a/Constants.java | 1 - .../main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java | 2 +- .../org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java | 2 +- .../org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java | 7 ++----- .../fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java | 5 ++--- 5 files changed, 6 insertions(+), 11 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index d95d936f13191..d17df133822e3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -412,7 +412,6 @@ private Constants() { // gzip, deflate, compress, br, etc. public static final String CONTENT_ENCODING = "fs.s3a.content.encoding"; - public static final String DEFAULT_CONTENT_ENCODING = null; // should we try to purge old multipart uploads when starting up public static final String PURGE_EXISTING_MULTIPART = diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index b15284e4c18fe..c69e695e720fd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -926,7 +926,7 @@ protected RequestFactory createRequestFactory() { initCannedAcls(getConf()); // Any encoding type - String contentEncoding = getConf().get(CONTENT_ENCODING, DEFAULT_CONTENT_ENCODING); + String contentEncoding = getConf().getTrimmed(CONTENT_ENCODING, null); return RequestFactoryImpl.builder() .withBucket(requireNonNull(bucket)) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index b8266c543dc05..21de5a0a6d760 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -605,7 +605,7 @@ public static final class RequestFactoryBuilder { private boolean requesterPays = false; /** Content Encoding. */ - private String contentEncoding = null; + private String contentEncoding; /** * Multipart limit. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java index f5e27cc5bd19e..39fb70f466a68 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java @@ -41,9 +41,6 @@ */ public class ITestS3AContentEncoding extends AbstractS3ATestBase { - private static final Logger LOG = - LoggerFactory.getLogger(ITestS3ACannedACLs.class); - @Override protected Configuration createConfiguration() { Configuration conf = super.createConfiguration(); @@ -62,7 +59,7 @@ public void testCreatedObjectsHaveEncoding() throws Throwable { assertObjectHasEncoding(path); Path path2 = new Path(dir, "2"); fs.rename(path, path2); - assertObjectHasEncoding(path); + assertObjectHasEncoding(path2); } /** @@ -76,7 +73,7 @@ private void assertObjectHasEncoding(Path path) throws Throwable { Map xAttrs = fs.getXAttrs(path); String encoding = decodeBytes(xAttrs.get(XA_CONTENT_ENCODING)); Assertions.assertThat(encoding) - .describedAs("Encoding of object %s is gzip", path) + .describedAs("Encoding of object %s should be gzip, is %s", path, encoding) .isEqualTo("gzip"); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java index fc69c602ab1e3..9f6f553dc1932 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSUserDefinedKey.java @@ -39,9 +39,8 @@ protected Configuration createConfiguration() { // get the KMS key for this test. Configuration c = new Configuration(); String kmsKey = c.get(SERVER_SIDE_ENCRYPTION_KEY); - String encryptionAlgorithm = c.get(SERVER_SIDE_ENCRYPTION_ALGORITHM); - if (kmsKey == null || StringUtils.isBlank(kmsKey) || encryptionAlgorithm == null || - !encryptionAlgorithm.equals(S3AEncryptionMethods.CSE_KMS.name())) { + String encryptionAlgorithm = c.get(SERVER_SIDE_ENCRYPTION_ALGORITHM, ""); + if (StringUtils.isBlank(kmsKey) || !encryptionAlgorithm.equals(S3AEncryptionMethods.CSE_KMS.name())) { skip(SERVER_SIDE_ENCRYPTION_KEY + " is not set for " + SSE_KMS.getMethod() + " or CSE-KMS algorithm is used instead of " + "SSE-KMS"); From 3d45dc78b3ff309289603a7bde6aae91b73e450a Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Fri, 10 Sep 2021 13:20:11 -0700 Subject: [PATCH 10/10] Unused import cleanup in test --- .../java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java index 39fb70f466a68..7b3b382d40a1c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java @@ -20,8 +20,6 @@ import java.util.Map; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.ObjectMetadata; import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger;