From 066036b2d2990e00c9b535c267a8f563a74c24fa Mon Sep 17 00:00:00 2001 From: PradhanPrerak39 Date: Tue, 13 Feb 2024 18:47:14 -0800 Subject: [PATCH 1/7] [SPARK-38958]: Override S3 Client in Spark Write/Read calls --- .../org/apache/hadoop/fs/s3a/Constants.java | 17 +++++++ .../hadoop/fs/s3a/impl/AWSClientConfig.java | 48 +++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 96dc2be6a260d..e7493af7670ad 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -775,6 +775,23 @@ private Constants() { "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + ".signing-algorithm"; + /** + * List of custom headers to be set on the service client. + * Multiple parameters can be used to specify custom headers. + * fs.s3a.s3.custom.headers - headers to add on all the s3 requests. + * fs.s3a.sts.custom.headers - headers to add on all the sts requests. + * Examples + * CustomHeader {@literal ->} 'Header1:Value1' + * CustomHeaders {@literal ->} 'Header1=Value1:Value2,Header2=Value1' + */ + public static final String CUSTOM_HEADERS_STS = + "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + + ".custom.headers"; + + public static final String CUSTOM_HEADERS_S3 = + "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() + + ".custom.headers"; + public static final String S3N_FOLDER_SUFFIX = "_$folder$"; public static final String FS_S3A_BLOCK_SIZE = "fs.s3a.block.size"; public static final String FS_S3A = "s3a"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 60729ac30866a..990d3cbb3f4c4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -22,6 +22,8 @@ import java.net.URI; import java.net.URISyntaxException; import java.time.Duration; +import java.util.Arrays; +import java.util.List; import java.util.concurrent.TimeUnit; import org.slf4j.Logger; @@ -72,6 +74,8 @@ import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM_STS; import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.USER_AGENT_PREFIX; +import static org.apache.hadoop.fs.s3a.Constants.CUSTOM_HEADERS_S3; +import static org.apache.hadoop.fs.s3a.Constants.CUSTOM_HEADERS_STS; import static org.apache.hadoop.fs.s3a.impl.ConfigurationHelper.enforceMinimumDuration; import static org.apache.hadoop.fs.s3a.impl.ConfigurationHelper.getDuration; import static org.apache.hadoop.util.Preconditions.checkArgument; @@ -116,6 +120,8 @@ public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Conf initUserAgent(conf, overrideConfigBuilder); + initRequestHeaders(conf, overrideConfigBuilder, awsServiceIdentifier); + String signer = conf.getTrimmed(SIGNING_ALGORITHM, ""); if (!signer.isEmpty()) { LOG.debug("Signer override = {}", signer); @@ -407,6 +413,48 @@ private static void initSigner(Configuration conf, } } + /** + * + * @param conf hadoop configuration + * @param clientConfig client configuration to update + * @param awsServiceIdentifier service name + */ + private static void initRequestHeaders(Configuration conf, + ClientOverrideConfiguration.Builder clientConfig, String awsServiceIdentifier) { + String configKey = null; + switch (awsServiceIdentifier) { + case AWS_SERVICE_IDENTIFIER_S3: + configKey = CUSTOM_HEADERS_S3; + break; + case AWS_SERVICE_IDENTIFIER_STS: + configKey = CUSTOM_HEADERS_STS; + break; + default: + // Nothing to do. The original signer override is already setup + } + if (configKey != null) { + String[] customHeaders = conf.getTrimmedStrings(configKey); + if (customHeaders == null || customHeaders.length == 0) { + LOG.debug("No custom headers specified"); + return; + } + + for (String customHeader : customHeaders) { + String[] parts = customHeader.split("="); + if (parts.length != 2) { + String message = "Invalid format (Expected header1=value1:value2,header2=value1) for Header: [" + + customHeader + + "]"; + LOG.error(message); + throw new IllegalArgumentException(message); + } + + List values = Arrays.asList(parts[1].split(":")); + clientConfig.putHeader(parts[0], values); + } + } + } + /** * Configures request timeout in the client configuration. * This is independent of the timeouts set in the sync and async HTTP clients; From 917e21417af5979b42a9887862efa56db493c16d Mon Sep 17 00:00:00 2001 From: PradhanPrerak Date: Mon, 17 Jun 2024 13:38:48 -0700 Subject: [PATCH 2/7] address comments --- .../org/apache/hadoop/fs/s3a/Constants.java | 8 +-- .../hadoop/fs/s3a/impl/AWSClientConfig.java | 26 +++----- .../fs/s3a/impl/TestAwsClientConfig.java | 59 +++++++++++++++++++ 3 files changed, 70 insertions(+), 23 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 8ce13269a18d4..58c318ce989a2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -778,18 +778,18 @@ private Constants() { /** * List of custom headers to be set on the service client. * Multiple parameters can be used to specify custom headers. - * fs.s3a.s3.custom.headers - headers to add on all the s3 requests. - * fs.s3a.sts.custom.headers - headers to add on all the sts requests. + * fs.s3a.client.s3.custom.headers - headers to add on all the s3 requests. + * fs.s3a.client.sts.custom.headers - headers to add on all the sts requests. * Examples * CustomHeader {@literal ->} 'Header1:Value1' * CustomHeaders {@literal ->} 'Header1=Value1:Value2,Header2=Value1' */ public static final String CUSTOM_HEADERS_STS = - "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + "fs.s3a.client." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + ".custom.headers"; public static final String CUSTOM_HEADERS_S3 = - "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() + "fs.s3a.client." + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() + ".custom.headers"; public static final String S3N_FOLDER_SUFFIX = "_$folder$"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 990d3cbb3f4c4..0eb6fd8605244 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -24,6 +24,7 @@ import java.time.Duration; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.concurrent.TimeUnit; import org.slf4j.Logger; @@ -433,25 +434,12 @@ private static void initRequestHeaders(Configuration conf, // Nothing to do. The original signer override is already setup } if (configKey != null) { - String[] customHeaders = conf.getTrimmedStrings(configKey); - if (customHeaders == null || customHeaders.length == 0) { - LOG.debug("No custom headers specified"); - return; - } - - for (String customHeader : customHeaders) { - String[] parts = customHeader.split("="); - if (parts.length != 2) { - String message = "Invalid format (Expected header1=value1:value2,header2=value1) for Header: [" - + customHeader - + "]"; - LOG.error(message); - throw new IllegalArgumentException(message); - } - - List values = Arrays.asList(parts[1].split(":")); - clientConfig.putHeader(parts[0], values); - } + Map awsClientCustomHeadersMap = + S3AUtils.getTrimmedStringCollectionSplitByEquals(conf, configKey); + awsClientCustomHeadersMap.forEach((header, valueString) -> { + List headerValues = Arrays.asList(valueString.split(":")); + clientConfig.putHeader(header, headerValues); + }); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java index eacff90ea4c8a..c5925c49f1137 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java @@ -18,9 +18,11 @@ package org.apache.hadoop.fs.s3a.impl; +import java.io.IOException; import java.time.Duration; import java.util.Arrays; +import org.apache.hadoop.util.Lists; import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Test; @@ -30,10 +32,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.test.AbstractHadoopTestBase; +import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3; +import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_STS; import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_ACQUISITION_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_IDLE_TIME; import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_KEEPALIVE; import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_TTL; +import static org.apache.hadoop.fs.s3a.Constants.CUSTOM_HEADERS_S3; +import static org.apache.hadoop.fs.s3a.Constants.CUSTOM_HEADERS_STS; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_CONNECTION_ACQUISITION_TIMEOUT_DURATION; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_CONNECTION_IDLE_TIME_DURATION; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_CONNECTION_KEEPALIVE; @@ -48,6 +54,7 @@ import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.SOCKET_TIMEOUT; import static org.apache.hadoop.fs.s3a.impl.AWSClientConfig.createApiConnectionSettings; +import static org.apache.hadoop.fs.s3a.impl.AWSClientConfig.createClientConfigBuilder; import static org.apache.hadoop.fs.s3a.impl.AWSClientConfig.createConnectionSettings; import static org.apache.hadoop.fs.s3a.impl.ConfigurationHelper.enforceMinimumDuration; @@ -201,4 +208,56 @@ public void testCreateApiConnectionSettingsDefault() { private void setOptionsToValue(String value, Configuration conf, String... keys) { Arrays.stream(keys).forEach(key -> conf.set(key, value)); } + + /** + * if {@link org.apache.hadoop.fs.s3a.Constants#CUSTOM_HEADERS_STS} is set, + * verify that returned client configuration has desired headers set. + */ + @Test + public void testInitRequestHeadersForSTS() throws IOException { + final Configuration conf = new Configuration(); + conf.set(CUSTOM_HEADERS_STS, "foo=bar:baz,qux=quux"); + Assertions.assertThat(conf.get(CUSTOM_HEADERS_S3)) + .describedAs("Custom client headers for s3 %s", CUSTOM_HEADERS_S3) + .isNull(); + + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().size()) + .describedAs("Count of S3 client headers") + .isEqualTo(0); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().size()) + .describedAs("Count of STS client headers") + .isEqualTo(2); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().get("foo")) + .describedAs("STS client 'foo' header value") + .isEqualTo(Lists.newArrayList("bar", "baz")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().get("qux")) + .describedAs("STS client 'qux' header value") + .isEqualTo(Lists.newArrayList("quux")); + } + + /** + * if {@link org.apache.hadoop.fs.s3a.Constants#CUSTOM_HEADERS_S3} is set, + * verify that returned client configuration has desired headers set. + */ + @Test + public void testInitRequestHeadersForS3() throws IOException { + final Configuration conf = new Configuration(); + conf.set(CUSTOM_HEADERS_S3, "foo=bar:baz,qux=quux"); + Assertions.assertThat(conf.get(CUSTOM_HEADERS_STS)) + .describedAs("Custom client headers for STS %s", CUSTOM_HEADERS_STS) + .isNull(); + + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().size()) + .describedAs("Count of STS client headers") + .isEqualTo(0); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().size()) + .describedAs("Count of S3 client headers") + .isEqualTo(2); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().get("foo")) + .describedAs("S3 client 'foo' header value") + .isEqualTo(Lists.newArrayList("bar", "baz")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().get("qux")) + .describedAs("S3 client 'qux' header value") + .isEqualTo(Lists.newArrayList("quux")); + } } From 626d8b02b0753f22cd6f0c8741a2b1f1b25438a9 Mon Sep 17 00:00:00 2001 From: PradhanPrerak Date: Tue, 2 Jul 2024 12:20:19 -0700 Subject: [PATCH 3/7] address comments --- .../java/org/apache/hadoop/fs/s3a/Constants.java | 14 ++++++++++---- .../apache/hadoop/fs/s3a/impl/AWSClientConfig.java | 1 + .../hadoop/fs/s3a/impl/TestAwsClientConfig.java | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 58c318ce989a2..c1e9e3dec86f0 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -775,6 +775,12 @@ private Constants() { "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + ".signing-algorithm"; + /** Prefix for S3A client-specific properties. */ + public static final String FS_S3A_CLIENT_PREFIX = "fs.s3a.client."; + + /** Custom headers postfix */ + public static final String CUSTOM_HEADER_POSTFIX = ".custom.headers"; + /** * List of custom headers to be set on the service client. * Multiple parameters can be used to specify custom headers. @@ -785,12 +791,12 @@ private Constants() { * CustomHeaders {@literal ->} 'Header1=Value1:Value2,Header2=Value1' */ public static final String CUSTOM_HEADERS_STS = - "fs.s3a.client." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() - + ".custom.headers"; + FS_S3A_CLIENT_PREFIX + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + + CUSTOM_HEADER_POSTFIX; public static final String CUSTOM_HEADERS_S3 = - "fs.s3a.client." + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() - + ".custom.headers"; + FS_S3A_CLIENT_PREFIX + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() + + CUSTOM_HEADER_POSTFIX; public static final String S3N_FOLDER_SUFFIX = "_$folder$"; public static final String FS_S3A_BLOCK_SIZE = "fs.s3a.block.size"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 0eb6fd8605244..bc29ca99dcf0c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -440,6 +440,7 @@ private static void initRequestHeaders(Configuration conf, List headerValues = Arrays.asList(valueString.split(":")); clientConfig.putHeader(header, headerValues); }); + LOG.debug("headers for {} client = {}", awsServiceIdentifier, clientConfig.headers()); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java index c5925c49f1137..ed626b7806515 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java @@ -22,7 +22,6 @@ import java.time.Duration; import java.util.Arrays; -import org.apache.hadoop.util.Lists; import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Test; @@ -31,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.apache.hadoop.util.Lists; import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3; import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_STS; From 47baf3460c4a9ba13bc4dabdba3e8b3b72d0393c Mon Sep 17 00:00:00 2001 From: PradhanPrerak Date: Tue, 2 Jul 2024 15:44:38 -0700 Subject: [PATCH 4/7] renames variable --- .../src/main/java/org/apache/hadoop/fs/s3a/Constants.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index c1e9e3dec86f0..6ebbac4eb6ae1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -779,7 +779,7 @@ private Constants() { public static final String FS_S3A_CLIENT_PREFIX = "fs.s3a.client."; /** Custom headers postfix */ - public static final String CUSTOM_HEADER_POSTFIX = ".custom.headers"; + public static final String CUSTOM_HEADERS_POSTFIX = ".custom.headers"; /** * List of custom headers to be set on the service client. @@ -792,11 +792,11 @@ private Constants() { */ public static final String CUSTOM_HEADERS_STS = FS_S3A_CLIENT_PREFIX + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() - + CUSTOM_HEADER_POSTFIX; + + CUSTOM_HEADERS_POSTFIX; public static final String CUSTOM_HEADERS_S3 = FS_S3A_CLIENT_PREFIX + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() - + CUSTOM_HEADER_POSTFIX; + + CUSTOM_HEADERS_POSTFIX; public static final String S3N_FOLDER_SUFFIX = "_$folder$"; public static final String FS_S3A_BLOCK_SIZE = "fs.s3a.block.size"; From 4b26b505037f20f8a1713ed0ef7c7ed3cf0ae1df Mon Sep 17 00:00:00 2001 From: PradhanPrerak Date: Tue, 2 Jul 2024 16:29:38 -0700 Subject: [PATCH 5/7] retrigger checks From 0fba78f53e9745f8436d460a4ece86ddba9ceae8 Mon Sep 17 00:00:00 2001 From: PradhanPrerak Date: Wed, 3 Jul 2024 14:12:59 -0700 Subject: [PATCH 6/7] check style fixes --- .../org/apache/hadoop/fs/s3a/Constants.java | 2 +- .../hadoop/fs/s3a/impl/AWSClientConfig.java | 16 +++--- .../fs/s3a/impl/TestAwsClientConfig.java | 56 +++++++++++-------- 3 files changed, 41 insertions(+), 33 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 6ebbac4eb6ae1..ec5140efec9de 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -778,7 +778,7 @@ private Constants() { /** Prefix for S3A client-specific properties. */ public static final String FS_S3A_CLIENT_PREFIX = "fs.s3a.client."; - /** Custom headers postfix */ + /** Custom headers postfix. */ public static final String CUSTOM_HEADERS_POSTFIX = ".custom.headers"; /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index bc29ca99dcf0c..0f4f4ffd9fc42 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -424,14 +424,14 @@ private static void initRequestHeaders(Configuration conf, ClientOverrideConfiguration.Builder clientConfig, String awsServiceIdentifier) { String configKey = null; switch (awsServiceIdentifier) { - case AWS_SERVICE_IDENTIFIER_S3: - configKey = CUSTOM_HEADERS_S3; - break; - case AWS_SERVICE_IDENTIFIER_STS: - configKey = CUSTOM_HEADERS_STS; - break; - default: - // Nothing to do. The original signer override is already setup + case AWS_SERVICE_IDENTIFIER_S3: + configKey = CUSTOM_HEADERS_S3; + break; + case AWS_SERVICE_IDENTIFIER_STS: + configKey = CUSTOM_HEADERS_STS; + break; + default: + // Nothing to do. The original signer override is already setup } if (configKey != null) { Map awsClientCustomHeadersMap = diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java index ed626b7806515..20c95be6926a2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java @@ -221,18 +221,22 @@ public void testInitRequestHeadersForSTS() throws IOException { .describedAs("Custom client headers for s3 %s", CUSTOM_HEADERS_S3) .isNull(); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().size()) - .describedAs("Count of S3 client headers") - .isEqualTo(0); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().size()) - .describedAs("Count of STS client headers") - .isEqualTo(2); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().get("foo")) - .describedAs("STS client 'foo' header value") - .isEqualTo(Lists.newArrayList("bar", "baz")); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().get("qux")) - .describedAs("STS client 'qux' header value") - .isEqualTo(Lists.newArrayList("quux")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) + .headers().size()) + .describedAs("Count of S3 client headers") + .isEqualTo(0); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS) + .headers().size()) + .describedAs("Count of STS client headers") + .isEqualTo(2); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS) + .headers().get("foo")) + .describedAs("STS client 'foo' header value") + .isEqualTo(Lists.newArrayList("bar", "baz")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS) + .headers().get("qux")) + .describedAs("STS client 'qux' header value") + .isEqualTo(Lists.newArrayList("quux")); } /** @@ -247,17 +251,21 @@ public void testInitRequestHeadersForS3() throws IOException { .describedAs("Custom client headers for STS %s", CUSTOM_HEADERS_STS) .isNull(); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS).headers().size()) - .describedAs("Count of STS client headers") - .isEqualTo(0); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().size()) - .describedAs("Count of S3 client headers") - .isEqualTo(2); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().get("foo")) - .describedAs("S3 client 'foo' header value") - .isEqualTo(Lists.newArrayList("bar", "baz")); - Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3).headers().get("qux")) - .describedAs("S3 client 'qux' header value") - .isEqualTo(Lists.newArrayList("quux")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_STS) + .headers().size()) + .describedAs("Count of STS client headers") + .isEqualTo(0); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) + .headers().size()) + .describedAs("Count of S3 client headers") + .isEqualTo(2); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) + .headers().get("foo")) + .describedAs("S3 client 'foo' header value") + .isEqualTo(Lists.newArrayList("bar", "baz")); + Assertions.assertThat(createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3) + .headers().get("qux")) + .describedAs("S3 client 'qux' header value") + .isEqualTo(Lists.newArrayList("quux")); } } From 67416252985ba907e8fdcf429d45d8daf0453aba Mon Sep 17 00:00:00 2001 From: PradhanPrerak Date: Wed, 14 Aug 2024 15:52:10 -0700 Subject: [PATCH 7/7] change delimiter to semicolon (;) and adress comments --- .../org/apache/hadoop/fs/s3a/Constants.java | 51 ++++++++++--------- .../hadoop/fs/s3a/impl/AWSClientConfig.java | 6 +-- .../fs/s3a/impl/TestAwsClientConfig.java | 4 +- 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 99fde7db29bc2..3f58804e7e0e8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -24,6 +24,7 @@ import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; import java.time.Duration; +import java.util.Locale; import java.util.concurrent.TimeUnit; /** @@ -785,29 +786,6 @@ private Constants() { "fs.s3a." + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() + ".signing-algorithm"; - /** Prefix for S3A client-specific properties. */ - public static final String FS_S3A_CLIENT_PREFIX = "fs.s3a.client."; - - /** Custom headers postfix. */ - public static final String CUSTOM_HEADERS_POSTFIX = ".custom.headers"; - - /** - * List of custom headers to be set on the service client. - * Multiple parameters can be used to specify custom headers. - * fs.s3a.client.s3.custom.headers - headers to add on all the s3 requests. - * fs.s3a.client.sts.custom.headers - headers to add on all the sts requests. - * Examples - * CustomHeader {@literal ->} 'Header1:Value1' - * CustomHeaders {@literal ->} 'Header1=Value1:Value2,Header2=Value1' - */ - public static final String CUSTOM_HEADERS_STS = - FS_S3A_CLIENT_PREFIX + Constants.AWS_SERVICE_IDENTIFIER_STS.toLowerCase() - + CUSTOM_HEADERS_POSTFIX; - - public static final String CUSTOM_HEADERS_S3 = - FS_S3A_CLIENT_PREFIX + Constants.AWS_SERVICE_IDENTIFIER_S3.toLowerCase() - + CUSTOM_HEADERS_POSTFIX; - public static final String S3N_FOLDER_SUFFIX = "_$folder$"; public static final String FS_S3A_BLOCK_SIZE = "fs.s3a.block.size"; public static final String FS_S3A = "s3a"; @@ -1275,6 +1253,33 @@ private Constants() { public static final String AWS_SERVICE_IDENTIFIER_DDB = "DDB"; public static final String AWS_SERVICE_IDENTIFIER_STS = "STS"; + /** Prefix for S3A client-specific properties. + * value: {@value} + */ + public static final String FS_S3A_CLIENT_PREFIX = "fs.s3a.client."; + + /** Custom headers postfix. + * value: {@value} + */ + public static final String CUSTOM_HEADERS_POSTFIX = ".custom.headers"; + + /** + * List of custom headers to be set on the service client. + * Multiple parameters can be used to specify custom headers. + * fs.s3a.client.s3.custom.headers - headers to add on all the s3 requests. + * fs.s3a.client.sts.custom.headers - headers to add on all the sts requests. + * Examples + * CustomHeader {@literal ->} 'Header1:Value1' + * CustomHeaders {@literal ->} 'Header1=Value1;Value2,Header2=Value1' + */ + public static final String CUSTOM_HEADERS_STS = + FS_S3A_CLIENT_PREFIX + AWS_SERVICE_IDENTIFIER_STS.toLowerCase(Locale.ROOT) + + CUSTOM_HEADERS_POSTFIX; + + public static final String CUSTOM_HEADERS_S3 = + FS_S3A_CLIENT_PREFIX + AWS_SERVICE_IDENTIFIER_S3.toLowerCase(Locale.ROOT) + + CUSTOM_HEADERS_POSTFIX; + /** * How long to wait for the thread pool to terminate when cleaning up. * Value: {@value} seconds. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 0f4f4ffd9fc42..a99fa0f6b755d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -415,7 +415,7 @@ private static void initSigner(Configuration conf, } /** - * + * Initialize custom request headers for AWS clients. * @param conf hadoop configuration * @param clientConfig client configuration to update * @param awsServiceIdentifier service name @@ -431,13 +431,13 @@ private static void initRequestHeaders(Configuration conf, configKey = CUSTOM_HEADERS_STS; break; default: - // Nothing to do. The original signer override is already setup + // No known service. } if (configKey != null) { Map awsClientCustomHeadersMap = S3AUtils.getTrimmedStringCollectionSplitByEquals(conf, configKey); awsClientCustomHeadersMap.forEach((header, valueString) -> { - List headerValues = Arrays.asList(valueString.split(":")); + List headerValues = Arrays.asList(valueString.split(";")); clientConfig.putHeader(header, headerValues); }); LOG.debug("headers for {} client = {}", awsServiceIdentifier, clientConfig.headers()); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java index 20c95be6926a2..859544f0878d1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestAwsClientConfig.java @@ -216,7 +216,7 @@ private void setOptionsToValue(String value, Configuration conf, String... keys) @Test public void testInitRequestHeadersForSTS() throws IOException { final Configuration conf = new Configuration(); - conf.set(CUSTOM_HEADERS_STS, "foo=bar:baz,qux=quux"); + conf.set(CUSTOM_HEADERS_STS, "foo=bar;baz,qux=quux"); Assertions.assertThat(conf.get(CUSTOM_HEADERS_S3)) .describedAs("Custom client headers for s3 %s", CUSTOM_HEADERS_S3) .isNull(); @@ -246,7 +246,7 @@ public void testInitRequestHeadersForSTS() throws IOException { @Test public void testInitRequestHeadersForS3() throws IOException { final Configuration conf = new Configuration(); - conf.set(CUSTOM_HEADERS_S3, "foo=bar:baz,qux=quux"); + conf.set(CUSTOM_HEADERS_S3, "foo=bar;baz,qux=quux"); Assertions.assertThat(conf.get(CUSTOM_HEADERS_STS)) .describedAs("Custom client headers for STS %s", CUSTOM_HEADERS_STS) .isNull();