From 1b65fd374a79823c012a0fb50a701295f3150f16 Mon Sep 17 00:00:00 2001 From: Brian Loss Date: Wed, 21 Jul 2021 11:41:20 -0400 Subject: [PATCH 1/3] HADOOP-17811: Configure ExponentialRetryPolicy Allow ExponentialRetryPolicy to be fully configured from the hadoop configuration. Properties are already defined for this, so ensure that they are passed along when the retry policy is constructed and that documentation has been updated. --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 2 +- .../services/ExponentialRetryPolicy.java | 11 ++++++ .../hadoop-azure/src/site/markdown/abfs.md | 24 +++++++++++- .../services/TestExponentialRetryPolicy.java | 38 +++++++++++++++++-- 4 files changed, 69 insertions(+), 6 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index a3cd1532e675b..3a527f7f0c3c9 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -1524,7 +1524,7 @@ private void initializeClient(URI uri, String fileSystemName, private AbfsClientContext populateAbfsClientContext() { return new AbfsClientContextBuilder() .withExponentialRetryPolicy( - new ExponentialRetryPolicy(abfsConfiguration.getMaxIoRetries())) + new ExponentialRetryPolicy(abfsConfiguration)) .withAbfsCounters(abfsCounters) .withAbfsPerfTracker(abfsPerfTracker) .build(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java index 9a75c78aa0612..89d99471a8214 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java @@ -21,6 +21,7 @@ import java.util.Random; import java.net.HttpURLConnection; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; /** @@ -89,6 +90,16 @@ public ExponentialRetryPolicy(final int maxIoRetries) { DEFAULT_CLIENT_BACKOFF); } + /** + * Initializes a new instance of the {@link ExponentialRetryPolicy} class. + * + * @param conf The {@link AbfsConfiguration} from which to retrieve retry configuration. + */ + public ExponentialRetryPolicy(AbfsConfiguration conf) { + this(conf.getMaxIoRetries(), conf.getMinBackoffIntervalMilliseconds(), conf.getMaxBackoffIntervalMilliseconds(), + conf.getBackoffIntervalMilliseconds()); + } + /** * Initializes a new instance of the {@link ExponentialRetryPolicy} class. * diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index e8eae8c489ea1..9258fc7c6ad64 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -338,7 +338,7 @@ with the following configurations. retries. Default value is 5. * `fs.azure.oauth.token.fetch.retry.min.backoff.interval`: Minimum back-off interval. Added to the retry interval computed from delta backoff. By - default this si set as 0. Set the interval in milli seconds. + default this is set as 0. Set the interval in milli seconds. * `fs.azure.oauth.token.fetch.retry.max.backoff.interval`: Maximum back-off interval. Default value is 60000 (sixty seconds). Set the interval in milli seconds. @@ -803,6 +803,28 @@ Currently this is used only for the server call retry logic. Used within AbfsClient class as part of the ExponentialRetryPolicy. The value should be greater than or equal to 0. +`fs.azure.io.retry.min.backoff.interval`: Sets the minimum backoff interval for +retries of IO operations. Currently this is used only for the server call retry +logic. Used within AbfsClient class as part of the EponentialRetryPolicy. This +value indicates the smallest interval (in milliseconds) to wait before retrying +an IO operation. The default value is 3000 (3 seconds). + +`fs.azure.io.retry.max.backoff.interval`: Sets the maximum backoff interval for +retries of IO operations. Currently this is used only for the server call retry +logic. Used within AbfsClient class as part of the EponentialRetryPolicy. This +value indicates the largest interval (in milliseconds) to wait before retrying +an IO operation. The default value is 30000 (30 seconds). + +`fs.azure.io.retry.backoff.interval`: Sets the default backoff interval for +retries of IO operations. Currently this is used only for the server call retry +logic. Used within AbfsClient class as part of the EponentialRetryPolicy. This +value is used to compute a random delta between 80% and 120% of the specified +value. This random delta is then multiplied by an exponent of the current IO +retry number (i.e., the default is multiplied by `2^(retryNum - 1)`) and then +contstrained within the range of [`fs.azure.io.retry.min.backoff.interval`, +`fs.azure.io.retry.max.backoff.interval`] to determine the amount of time to +wait before the next IO retry attempt. The default value is 3000 (3 seconds). + `fs.azure.write.request.size`: To set the write buffer size. Specify the value in bytes. The value should be between 16384 to 104857600 both inclusive (16 KB to 100 MB). The default value will be 8388608 (8 MB). diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java index e10419f148b25..1f427525c3f97 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java @@ -18,6 +18,11 @@ package org.apache.hadoop.fs.azurebfs.services; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_BACKOFF_INTERVAL; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_BACKOFF_INTERVAL; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_IO_RETRIES; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MIN_BACKOFF_INTERVAL; + import java.util.Random; import org.junit.Assert; @@ -57,12 +62,37 @@ public void testDifferentMaxIORetryCount() throws Exception { @Test public void testDefaultMaxIORetryCount() throws Exception { AbfsConfiguration abfsConfig = getAbfsConfig(); - Assert.assertTrue( + Assert.assertEquals( String.format("default maxIORetry count is %s.", maxRetryCount), - abfsConfig.getMaxIoRetries() == maxRetryCount); + maxRetryCount, abfsConfig.getMaxIoRetries()); testMaxIOConfig(abfsConfig); } + @Test + public void testAbfsConfigConstructor() throws Exception { + // Ensure we choose expected values that are not defaults + ExponentialRetryPolicy template = new ExponentialRetryPolicy( + getAbfsConfig().getMaxIoRetries()); + int expectedMaxRetries = template.getRetryCount() + 5; + int expectedMinBackoff = template.getMinBackoff() + 2000; + int expectedMaxBackoff = template.getMaxBackoff() + 10000; + int expectedDeltaBackoff = template.getDeltaBackoff() + 3000; + + Configuration config = new Configuration(this.getRawConfiguration()); + config.setInt(AZURE_MAX_IO_RETRIES, expectedMaxRetries); + config.setInt(AZURE_MIN_BACKOFF_INTERVAL, expectedMinBackoff); + config.setInt(AZURE_MAX_BACKOFF_INTERVAL, expectedMaxBackoff); + config.setInt(AZURE_BACKOFF_INTERVAL, expectedDeltaBackoff); + + ExponentialRetryPolicy policy = new ExponentialRetryPolicy( + new AbfsConfiguration(config, "dummyAccountName")); + + Assert.assertEquals(expectedMaxRetries, policy.getRetryCount()); + Assert.assertEquals(expectedMinBackoff, policy.getMinBackoff()); + Assert.assertEquals(expectedMaxBackoff, policy.getMaxBackoff()); + Assert.assertEquals(expectedDeltaBackoff, policy.getDeltaBackoff()); + } + private AbfsConfiguration getAbfsConfig() throws Exception { Configuration config = new Configuration(this.getRawConfiguration()); @@ -81,8 +111,8 @@ private void testMaxIOConfig(AbfsConfiguration abfsConfig) { localRetryCount++; } - Assert.assertTrue( + Assert.assertEquals( "When all retries are exhausted, the retryCount will be same as max configured", - localRetryCount == abfsConfig.getMaxIoRetries()); + abfsConfig.getMaxIoRetries(), localRetryCount); } } From 2e746c98b0ed1fa4c937605102c92847d1712a76 Mon Sep 17 00:00:00 2001 From: Brian Loss Date: Thu, 22 Jul 2021 08:31:11 -0400 Subject: [PATCH 2/3] Fix spotbugs/checkstyle issues * Add spotbugs exclusion for IS2_INCONSISTENT_SYNC on NativeAzureFsInputStream.in to fix errant bug trigger introduced in PR #3149. * Refactor TestExponentialRetryPolicy to use a variable with the value offset rather than hard-coded numbers in order to satisfy checkstyle. --- .../hadoop-azure/dev-support/findbugs-exclude.xml | 8 ++++++++ .../azurebfs/services/TestExponentialRetryPolicy.java | 10 +++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/hadoop-tools/hadoop-azure/dev-support/findbugs-exclude.xml b/hadoop-tools/hadoop-azure/dev-support/findbugs-exclude.xml index b750b8b91c79e..cc8a460f83905 100644 --- a/hadoop-tools/hadoop-azure/dev-support/findbugs-exclude.xml +++ b/hadoop-tools/hadoop-azure/dev-support/findbugs-exclude.xml @@ -83,4 +83,12 @@ + + + + + + diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java index 1f427525c3f97..cb71ed9d6d850 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java @@ -37,7 +37,6 @@ * Unit test TestExponentialRetryPolicy. */ public class TestExponentialRetryPolicy extends AbstractAbfsIntegrationTest { - private final int maxRetryCount = 30; private final int noRetryCount = 0; private final int retryCount = new Random().nextInt(maxRetryCount); @@ -73,10 +72,11 @@ public void testAbfsConfigConstructor() throws Exception { // Ensure we choose expected values that are not defaults ExponentialRetryPolicy template = new ExponentialRetryPolicy( getAbfsConfig().getMaxIoRetries()); - int expectedMaxRetries = template.getRetryCount() + 5; - int expectedMinBackoff = template.getMinBackoff() + 2000; - int expectedMaxBackoff = template.getMaxBackoff() + 10000; - int expectedDeltaBackoff = template.getDeltaBackoff() + 3000; + int testModifier = 1; + int expectedMaxRetries = template.getRetryCount() + testModifier; + int expectedMinBackoff = template.getMinBackoff() + testModifier; + int expectedMaxBackoff = template.getMaxBackoff() + testModifier; + int expectedDeltaBackoff = template.getDeltaBackoff() + testModifier; Configuration config = new Configuration(this.getRawConfiguration()); config.setInt(AZURE_MAX_IO_RETRIES, expectedMaxRetries); From 6fa05970af3ca46b2145e7b554fed53b023eadf7 Mon Sep 17 00:00:00 2001 From: Brian Loss Date: Tue, 27 Jul 2021 14:08:06 -0400 Subject: [PATCH 3/3] HADOOP-17811: Implement PR feedback * Update doc formatting as requested to include backticks on class name * Fix some spelling errors in testing_azure.md * Add assertion failure messages for new test case --- hadoop-tools/hadoop-azure/src/site/markdown/abfs.md | 10 +++++----- .../hadoop-azure/src/site/markdown/testing_azure.md | 4 ++-- .../azurebfs/services/TestExponentialRetryPolicy.java | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index 9258fc7c6ad64..dfb7f3f42a5cf 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -800,24 +800,24 @@ The following configs are related to read and write operations. `fs.azure.io.retry.max.retries`: Sets the number of retries for IO operations. Currently this is used only for the server call retry logic. Used within -AbfsClient class as part of the ExponentialRetryPolicy. The value should be +`AbfsClient` class as part of the ExponentialRetryPolicy. The value should be greater than or equal to 0. `fs.azure.io.retry.min.backoff.interval`: Sets the minimum backoff interval for retries of IO operations. Currently this is used only for the server call retry -logic. Used within AbfsClient class as part of the EponentialRetryPolicy. This +logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This value indicates the smallest interval (in milliseconds) to wait before retrying an IO operation. The default value is 3000 (3 seconds). `fs.azure.io.retry.max.backoff.interval`: Sets the maximum backoff interval for retries of IO operations. Currently this is used only for the server call retry -logic. Used within AbfsClient class as part of the EponentialRetryPolicy. This +logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This value indicates the largest interval (in milliseconds) to wait before retrying an IO operation. The default value is 30000 (30 seconds). `fs.azure.io.retry.backoff.interval`: Sets the default backoff interval for retries of IO operations. Currently this is used only for the server call retry -logic. Used within AbfsClient class as part of the EponentialRetryPolicy. This +logic. Used within `AbfsClient` class as part of the ExponentialRetryPolicy. This value is used to compute a random delta between 80% and 120% of the specified value. This random delta is then multiplied by an exponent of the current IO retry number (i.e., the default is multiplied by `2^(retryNum - 1)`) and then @@ -881,7 +881,7 @@ when there are too many writes from the same process. ### Security Options `fs.azure.always.use.https`: Enforces to use HTTPS instead of HTTP when the flag -is made true. Irrespective of the flag, AbfsClient will use HTTPS if the secure +is made true. Irrespective of the flag, `AbfsClient` will use HTTPS if the secure scheme (ABFSS) is used or OAuth is used for authentication. By default this will be set to true. diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md index cf3b2344456af..933f86be3e896 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md @@ -448,7 +448,7 @@ use requires the presence of secret credentials, where tests may be slow, and where finding out why something failed from nothing but the test output is critical. -#### Subclasses Existing Shared Base Blasses +#### Subclasses Existing Shared Base Classes There are a set of base classes which should be extended for Azure tests and integration tests. @@ -602,7 +602,7 @@ various test combinations, it will: 2. Run tests for all combinations 3. Summarize results across all the test combination runs. -As a pre-requiste step, fill config values for test accounts and credentials +As a pre-requisite step, fill config values for test accounts and credentials needed for authentication in `src/test/resources/azure-auth-keys.xml.template` and rename as `src/test/resources/azure-auth-keys.xml`. diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java index cb71ed9d6d850..0f8dc55aa14a4 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java @@ -87,10 +87,10 @@ public void testAbfsConfigConstructor() throws Exception { ExponentialRetryPolicy policy = new ExponentialRetryPolicy( new AbfsConfiguration(config, "dummyAccountName")); - Assert.assertEquals(expectedMaxRetries, policy.getRetryCount()); - Assert.assertEquals(expectedMinBackoff, policy.getMinBackoff()); - Assert.assertEquals(expectedMaxBackoff, policy.getMaxBackoff()); - Assert.assertEquals(expectedDeltaBackoff, policy.getDeltaBackoff()); + Assert.assertEquals("Max retry count was not set as expected.", expectedMaxRetries, policy.getRetryCount()); + Assert.assertEquals("Min backoff interval was not set as expected.", expectedMinBackoff, policy.getMinBackoff()); + Assert.assertEquals("Max backoff interval was not set as expected.", expectedMaxBackoff, policy.getMaxBackoff()); + Assert.assertEquals("Delta backoff interval was not set as expected.", expectedDeltaBackoff, policy.getDeltaBackoff()); } private AbfsConfiguration getAbfsConfig() throws Exception {