From 4acc11d141254ba19dce483e1321983ad1f486a1 Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Tue, 30 Sep 2025 23:05:01 -0700 Subject: [PATCH 01/10] Metrics Changes: Ways to emit aggregated metrics + reduce number of metrics call --- hadoop-tools/hadoop-azure/pom.xml | 1 + .../hadoop/fs/azurebfs/AbfsConfiguration.java | 60 +- .../hadoop/fs/azurebfs/AbfsCountersImpl.java | 25 +- .../fs/azurebfs/AzureBlobFileSystem.java | 13 +- .../fs/azurebfs/AzureBlobFileSystemStore.java | 12 +- .../azurebfs/constants/ConfigurationKeys.java | 9 +- .../constants/FileSystemConfigurations.java | 6 + .../azurebfs/services/AbfsBackoffMetrics.java | 182 ++-- .../fs/azurebfs/services/AbfsBlobClient.java | 5 +- .../fs/azurebfs/services/AbfsClient.java | 269 ++++-- .../azurebfs/services/AbfsClientHandler.java | 17 +- .../fs/azurebfs/services/AbfsDfsClient.java | 5 +- .../services/AbfsReadFooterMetrics.java | 833 ++++++++++-------- .../azurebfs/services/AbfsRestOperation.java | 16 +- .../AbstractAbfsStatisticsSource.java | 24 + .../services/AggregateMetricsManager.java | 179 ++++ .../fs/azurebfs/services/KeepAliveCache.java | 5 + .../fs/azurebfs/services/MetricsBucket.java | 178 ++++ .../fs/azurebfs/utils/MetricFormat.java | 7 +- .../utils/ResourceUtilizationUtils.java | 3 +- .../fs/azurebfs/utils/SimpleRateLimiter.java | 73 ++ .../fs/azurebfs/utils/TracingContext.java | 10 + .../azurebfs/utils/TracingHeaderFormat.java | 5 +- .../azurebfs/utils/TracingHeaderVersion.java | 10 +- .../azurebfs/ITestAbfsReadFooterMetrics.java | 27 +- .../fs/azurebfs/services/ITestAbfsClient.java | 329 ++++++- .../fs/azurebfs/services/TestAbfsClient.java | 19 +- .../services/TestAbfsRenameRetryRecovery.java | 2 +- .../services/TestAbfsRestOperation.java | 10 - .../services/TestAggregateMetricsManager.java | 708 +++++++++++++++ .../TestApacheHttpClientFallback.java | 3 +- .../azurebfs/utils/TestSimpleRateLimiter.java | 144 +++ 32 files changed, 2534 insertions(+), 655 deletions(-) create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/MetricsBucket.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java diff --git a/hadoop-tools/hadoop-azure/pom.xml b/hadoop-tools/hadoop-azure/pom.xml index d0bfc6852befe..4234128787e8d 100644 --- a/hadoop-tools/hadoop-azure/pom.xml +++ b/hadoop-tools/hadoop-azure/pom.xml @@ -29,6 +29,7 @@ Currently this consists of a filesystem client to read data from and write data to Azure Storage. + 3.5.0-METRICS jar diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index e7591292c919a..1c01cdce3edd5 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -79,7 +79,9 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DOT; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.*; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_EMIT_INTERVAL_MINS; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.*; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_METRIC_EMIT_INTERVAL_MINS; import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.INCORRECT_INGRESS_TYPE; /** @@ -324,10 +326,6 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_METRIC_ANALYSIS_TIMEOUT_MS) private int metricAnalysisTimeout; - @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_URI, - DefaultValue = EMPTY_STRING) - private String metricUri; - @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_ACCOUNT_NAME, DefaultValue = EMPTY_STRING) private String metricAccount; @@ -336,6 +334,30 @@ public class AbfsConfiguration{ DefaultValue = EMPTY_STRING) private String metricAccountKey; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRICS_COLLECTION_ENABLED, + DefaultValue = DEFAULT_METRICS_COLLECTION_ENABLED) + private boolean metricsCollectionEnabled; + + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_SHOULD_EMIT_METRICS_ON_IDLE_TIME, + DefaultValue = DEFAULT_SHOULD_EMIT_METRICS_ON_IDLE_TIME) + private boolean shouldEmitMetricsOnIdleTime; + + @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_EMIT_THRESHOLD, + DefaultValue = DEFAULT_METRIC_EMIT_THRESHOLD) + private long metricEmitThreshold; + + @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRICS_EMIT_THRESHOLD_INTERVAL_SECS, + DefaultValue = DEFAULT_METRICS_EMIT_THRESHOLD_INTERVAL_SECS) + private long metricsEmitThresholdIntervalInSecs; + + @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_EMIT_INTERVAL_MINS, + DefaultValue = DEFAULT_METRIC_EMIT_INTERVAL_MINS) + private long metricEmitIntervalInMins; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_MAX_METRICS_CALLS_PER_SECOND, + DefaultValue = DEFAULT_MAX_METRICS_CALLS_PER_SECOND) + private int maxMetricsCallsPerSecond; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ACCOUNT_OPERATION_IDLE_TIMEOUT, DefaultValue = DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS) private int accountOperationIdleTimeout; @@ -1290,10 +1312,6 @@ public int getMetricAnalysisTimeout() { return this.metricAnalysisTimeout; } - public String getMetricUri() { - return metricUri; - } - public String getMetricAccount() { return metricAccount; } @@ -1302,6 +1320,30 @@ public String getMetricAccountKey() { return metricAccountKey; } + public boolean isMetricsCollectionEnabled() { + return metricsCollectionEnabled; + } + + public boolean shouldEmitMetricsOnIdleTime() { + return shouldEmitMetricsOnIdleTime; + } + + public long getMetricEmitThreshold() { + return metricEmitThreshold; + } + + public long getMetricEmitIntervalInMins() { + return metricEmitIntervalInMins; + } + + public long getMetricsEmitThresholdIntervalInSecs() { + return metricsEmitThresholdIntervalInSecs; + } + + public int getMaxMetricsCallsPerSecond() { + return maxMetricsCallsPerSecond; + } + public int getAccountOperationIdleTimeout() { return accountOperationIdleTimeout; } @@ -1390,7 +1432,7 @@ public TracingHeaderFormat getTracingHeaderFormat() { } public MetricFormat getMetricFormat() { - return getEnum(FS_AZURE_METRIC_FORMAT, MetricFormat.EMPTY); + return getEnum(FS_AZURE_METRIC_FORMAT, MetricFormat.INTERNAL_METRIC_FORMAT); } public AuthType getAuthType(String accountName) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java index 8bc7d5017bc43..8509caaa2891c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java @@ -207,17 +207,33 @@ public void initializeMetrics(MetricFormat metricFormat) { abfsBackoffMetrics = new AbfsBackoffMetrics(); break; case INTERNAL_FOOTER_METRIC_FORMAT: - abfsReadFooterMetrics = new AbfsReadFooterMetrics(); - break; + initializeReadFooterMetrics(); case INTERNAL_METRIC_FORMAT: abfsBackoffMetrics = new AbfsBackoffMetrics(); - abfsReadFooterMetrics = new AbfsReadFooterMetrics(); + initializeReadFooterMetrics(); break; default: break; } } + /** + * Initialize the read footer metrics. + * In case the metrics are already initialized, + * create a new instance with the existing map. + */ + private void initializeReadFooterMetrics() { + if (abfsReadFooterMetrics == null) { + abfsReadFooterMetrics = new AbfsReadFooterMetrics(); + } else { + //In case metrics is emitted based on total count, there could be a chance + // that file type for which we have calculated the type will be lost. + // To avoid that, creating a new instance with existing map. + abfsReadFooterMetrics = new AbfsReadFooterMetrics( + abfsReadFooterMetrics.getFileTypeMetricsMap()); + } + } + /** * Look up a Metric from registered set. * @@ -375,8 +391,7 @@ public DurationTracker trackDuration(String key) { public String toString() { String metric = ""; if (abfsBackoffMetrics != null) { - long totalNoRequests = getAbfsBackoffMetrics().getMetricValue(TOTAL_NUMBER_OF_REQUESTS); - if (totalNoRequests > 0) { + if (getAbfsBackoffMetrics().getMetricValue(TOTAL_NUMBER_OF_REQUESTS) > 0) { metric += "#BO:" + getAbfsBackoffMetrics().toString(); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index 572bc873b1c2e..655d79fa66423 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -215,6 +215,7 @@ public void initialize(URI uri, Configuration configuration) .withBlockFactory(blockFactory) .withBlockOutputActiveBlocks(blockOutputActiveBlocks) .withBackReference(new BackReference(this)) + .withFileSystemId(this.fileSystemId) .build(); this.abfsStore = new AzureBlobFileSystemStore(systemStoreBuilder); @@ -828,18 +829,6 @@ public synchronized void close() throws IOException { if (isClosed()) { return; } - if (getAbfsStore().getClient().isMetricCollectionEnabled()) { - TracingContext tracingMetricContext = new TracingContext( - clientCorrelationId, - fileSystemId, FSOperationType.GET_ATTR, true, - tracingHeaderFormat, - listener, abfsCounters.toString()); - try { - getAbfsClient().getMetricCall(tracingMetricContext); - } catch (IOException e) { - throw new IOException(e); - } - } // does all the delete-on-exit calls, and may be slow. super.close(); LOG.debug("AzureBlobFileSystem.close"); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 5d7d0895d0223..296b8c0ed130a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -258,7 +258,7 @@ public AzureBlobFileSystemStore( boolean useHttps = (usingOauth || abfsConfiguration.isHttpsAlwaysUsed()) ? true : abfsStoreBuilder.isSecureScheme; this.abfsPerfTracker = new AbfsPerfTracker(fileSystemName, accountName, this.abfsConfiguration); this.abfsCounters = abfsStoreBuilder.abfsCounters; - initializeClient(uri, fileSystemName, accountName, useHttps); + initializeClient(uri, fileSystemName, accountName, useHttps, abfsStoreBuilder.fileSystemId); final Class identityTransformerClass = abfsStoreBuilder.configuration.getClass(FS_AZURE_IDENTITY_TRANSFORM_CLASS, IdentityTransformer.class, IdentityTransformerInterface.class); @@ -1717,7 +1717,7 @@ public boolean isInfiniteLeaseKey(String key) { * @throws IOException */ private void initializeClient(URI uri, String fileSystemName, - String accountName, boolean isSecure) + String accountName, boolean isSecure, String fileSystemId) throws IOException { if (this.getClient() != null) { return; @@ -1795,7 +1795,7 @@ private void initializeClient(URI uri, String fileSystemName, this.clientHandler = new AbfsClientHandler(baseUrl, creds, abfsConfiguration, tokenProvider, sasTokenProvider, encryptionContextProvider, - populateAbfsClientContext()); + populateAbfsClientContext(), fileSystemId); this.setClient(getClientHandler().getClient()); LOG.trace("AbfsClient init complete"); @@ -1966,6 +1966,7 @@ public static final class AzureBlobFileSystemStoreBuilder { private DataBlocks.BlockFactory blockFactory; private int blockOutputActiveBlocks; private BackReference fsBackRef; + private String fileSystemId; public AzureBlobFileSystemStoreBuilder withUri(URI value) { this.uri = value; @@ -2007,6 +2008,11 @@ public AzureBlobFileSystemStoreBuilder withBackReference( return this; } + public AzureBlobFileSystemStoreBuilder withFileSystemId(String fileSystemId) { + this.fileSystemId = fileSystemId; + return this; + } + public AzureBlobFileSystemStoreBuilder build() { return this; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index 3de55adcdabf1..e6e4b4d382096 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -71,7 +71,13 @@ public final class ConfigurationKeys { public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME = "fs.azure.account.key"; public static final String FS_AZURE_METRIC_ACCOUNT_NAME = "fs.azure.metric.account.name"; public static final String FS_AZURE_METRIC_ACCOUNT_KEY = "fs.azure.metric.account.key"; - public static final String FS_AZURE_METRIC_URI = "fs.azure.metric.uri"; + public static final String FS_AZURE_METRIC_FORMAT = "fs.azure.metric.format"; + public static final String FS_AZURE_METRICS_COLLECTION_ENABLED = "fs.azure.metrics.collection.enabled"; + public static final String FS_AZURE_SHOULD_EMIT_METRICS_ON_IDLE_TIME = "fs.azure.should.emit.metrics.on.idle.time"; + public static final String FS_AZURE_METRIC_EMIT_THRESHOLD = "fs.azure.metric.emit.threshold"; + public static final String FS_AZURE_METRICS_EMIT_THRESHOLD_INTERVAL_SECS = "fs.azure.metrics.emit.threshold.interval.secs"; + public static final String FS_AZURE_METRIC_EMIT_INTERVAL_MINS = "fs.azure.metric.emit.interval.mins"; + public static final String FS_AZURE_MAX_METRICS_CALLS_PER_SECOND = "fs.azure.max.metrics.calls.per.second"; public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME_REGX = "fs\\.azure\\.account\\.key\\.(.*)"; public static final String FS_AZURE_SECURE_MODE = "fs.azure.secure.mode"; @@ -231,7 +237,6 @@ public final class ConfigurationKeys { * character constraints are not satisfied. **/ public static final String FS_AZURE_CLIENT_CORRELATIONID = "fs.azure.client.correlationid"; public static final String FS_AZURE_TRACINGHEADER_FORMAT = "fs.azure.tracingheader.format"; - public static final String FS_AZURE_METRIC_FORMAT = "fs.azure.metric.format"; public static final String FS_AZURE_CLUSTER_NAME = "fs.azure.cluster.name"; public static final String FS_AZURE_CLUSTER_TYPE = "fs.azure.cluster.type"; public static final String FS_AZURE_SSL_CHANNEL_MODE_KEY = "fs.azure.ssl.channel.mode"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index fb336da51966d..a8eba59e0f521 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -136,6 +136,12 @@ public final class FileSystemConfigurations { public static final boolean DEFAULT_ENABLE_AUTOTHROTTLING = false; public static final int DEFAULT_METRIC_IDLE_TIMEOUT_MS = 60_000; public static final int DEFAULT_METRIC_ANALYSIS_TIMEOUT_MS = 60_000; + public static final boolean DEFAULT_METRICS_COLLECTION_ENABLED = true; + public static final boolean DEFAULT_SHOULD_EMIT_METRICS_ON_IDLE_TIME = false; + public static final long DEFAULT_METRIC_EMIT_THRESHOLD = 100_000L; + public static final long DEFAULT_METRICS_EMIT_THRESHOLD_INTERVAL_SECS = 60; + public static final long DEFAULT_METRIC_EMIT_INTERVAL_MINS = 2 * 60; + public static final int DEFAULT_MAX_METRICS_CALLS_PER_SECOND = 3; public static final boolean DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = true; public static final int DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS = 60_000; public static final int DEFAULT_ANALYSIS_PERIOD_MS = 10_000; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBackoffMetrics.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBackoffMetrics.java index 84bb7b77f4a59..b78ed31338bd7 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBackoffMetrics.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBackoffMetrics.java @@ -33,7 +33,6 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COLON; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EQUAL; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.THOUSAND; import static org.apache.hadoop.fs.azurebfs.constants.MetricsConstants.DOUBLE_PRECISION_FORMAT; import static org.apache.hadoop.fs.azurebfs.constants.MetricsConstants.RETRY; @@ -75,18 +74,21 @@ * retry operations in Azure Blob File System (ABFS). */ public class AbfsBackoffMetrics extends AbstractAbfsStatisticsSource { - private static final Logger LOG = LoggerFactory.getLogger(AbfsBackoffMetrics.class); + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsBackoffMetrics.class); + private static final List RETRY_LIST = Arrays.asList( - RetryValue.values()); + RetryValue.values()); /** * Constructor to initialize the IOStatisticsStore with counters and gauges. */ public AbfsBackoffMetrics() { IOStatisticsStore ioStatisticsStore = iostatisticsStore() - .withCounters(getMetricNames(TYPE_COUNTER)) - .withGauges(getMetricNames(TYPE_GAUGE)) - .build(); + .withCounters(getMetricNames(TYPE_COUNTER)) + .withGauges(getMetricNames(TYPE_GAUGE)) + .build(); setIOStatistics(ioStatisticsStore); } @@ -98,15 +100,15 @@ public AbfsBackoffMetrics() { */ private String[] getMetricNames(StatisticTypeEnum type) { return Arrays.stream(AbfsBackoffMetricsEnum.values()) - .filter(backoffMetricsEnum -> backoffMetricsEnum - .getStatisticType() - .equals(type)) - .flatMap(backoffMetricsEnum -> - RETRY.equals(backoffMetricsEnum.getType()) - ? RETRY_LIST.stream().map(retryCount -> - getMetricName(backoffMetricsEnum, retryCount)) - : Stream.of(backoffMetricsEnum.getName()) - ).toArray(String[]::new); + .filter(backoffMetricsEnum -> backoffMetricsEnum + .getStatisticType() + .equals(type)) + .flatMap(backoffMetricsEnum -> + RETRY.equals(backoffMetricsEnum.getType()) + ? RETRY_LIST.stream().map(retryCount -> + getMetricName(backoffMetricsEnum, retryCount)) + : Stream.of(backoffMetricsEnum.getName()) + ).toArray(String[]::new); } /** @@ -116,7 +118,8 @@ private String[] getMetricNames(StatisticTypeEnum type) { * @param retryValue the retry value * @return the constructed metric name */ - private String getMetricName(AbfsBackoffMetricsEnum metric, RetryValue retryValue) { + private String getMetricName(AbfsBackoffMetricsEnum metric, + RetryValue retryValue) { if (metric == null) { LOG.error("ABFS Backoff Metric should not be null"); return EMPTY_STRING; @@ -134,15 +137,16 @@ private String getMetricName(AbfsBackoffMetricsEnum metric, RetryValue retryValu * @param retryValue the retry value * @return the value of the metric */ - public long getMetricValue(AbfsBackoffMetricsEnum metric, RetryValue retryValue) { + public long getMetricValue(AbfsBackoffMetricsEnum metric, + RetryValue retryValue) { String metricName = getMetricName(metric, retryValue); switch (metric.getStatisticType()) { - case TYPE_COUNTER: - return lookupCounterValue(metricName); - case TYPE_GAUGE: - return lookupGaugeValue(metricName); - default: - return 0; + case TYPE_COUNTER: + return lookupCounterValue(metricName); + case TYPE_GAUGE: + return lookupGaugeValue(metricName); + default: + return 0; } } @@ -162,18 +166,19 @@ public long getMetricValue(AbfsBackoffMetricsEnum metric) { * @param metric the metric enum * @param retryValue the retry value */ - public void incrementMetricValue(AbfsBackoffMetricsEnum metric, RetryValue retryValue) { + public void incrementMetricValue(AbfsBackoffMetricsEnum metric, + RetryValue retryValue) { String metricName = getMetricName(metric, retryValue); switch (metric.getStatisticType()) { - case TYPE_COUNTER: - incCounterValue(metricName); - break; - case TYPE_GAUGE: - incGaugeValue(metricName); - break; - default: - // Do nothing - break; + case TYPE_COUNTER: + incCounterValue(metricName); + break; + case TYPE_GAUGE: + incGaugeValue(metricName); + break; + default: + // Do nothing + break; } } @@ -193,18 +198,20 @@ public void incrementMetricValue(AbfsBackoffMetricsEnum metric) { * @param value the new value of the metric * @param retryValue the retry value */ - public void setMetricValue(AbfsBackoffMetricsEnum metric, long value, RetryValue retryValue) { + public void setMetricValue(AbfsBackoffMetricsEnum metric, + long value, + RetryValue retryValue) { String metricName = getMetricName(metric, retryValue); switch (metric.getStatisticType()) { - case TYPE_COUNTER: - setCounterValue(metricName, value); - break; - case TYPE_GAUGE: - setGaugeValue(metricName, value); - break; - default: - // Do nothing - break; + case TYPE_COUNTER: + setCounterValue(metricName, value); + break; + case TYPE_GAUGE: + setGaugeValue(metricName, value); + break; + default: + // Do nothing + break; } } @@ -227,9 +234,10 @@ public void setMetricValue(AbfsBackoffMetricsEnum metric, long value) { * @return String metrics value with precision */ private String getPrecisionMetrics(AbfsBackoffMetricsEnum metricName, - RetryValue retryCount, - long denominator) { - return format(DOUBLE_PRECISION_FORMAT, (double) getMetricValue(metricName, retryCount) / denominator); + RetryValue retryCount, + long denominator) { + return format(DOUBLE_PRECISION_FORMAT, + (double) getMetricValue(metricName, retryCount) / denominator); } /** @@ -240,25 +248,24 @@ private String getPrecisionMetrics(AbfsBackoffMetricsEnum metricName, private void getRetryMetrics(StringBuilder metricBuilder) { for (RetryValue retryCount : RETRY_LIST) { long totalRequests = getMetricValue(TOTAL_REQUESTS, retryCount); - metricBuilder.append(REQUEST_COUNT) - .append(retryCount.getValue()) - .append(REQUESTS) - .append(getMetricValue(NUMBER_OF_REQUESTS_SUCCEEDED, retryCount)); + if (getMetricValue(NUMBER_OF_REQUESTS_SUCCEEDED, retryCount) > 0) { + metricBuilder.append(REQUEST_COUNT) + .append(retryCount.getValue()) + .append(REQUESTS) + .append(getMetricValue(NUMBER_OF_REQUESTS_SUCCEEDED, retryCount)); + } if (totalRequests > 0) { metricBuilder.append(MIN_MAX_AVERAGE) - .append(retryCount.getValue()) - .append(REQUESTS) - .append(getPrecisionMetrics(MIN_BACK_OFF, retryCount, THOUSAND)) - .append(SECONDS) - .append(getPrecisionMetrics(MAX_BACK_OFF, retryCount, THOUSAND)) - .append(SECONDS) - .append(getPrecisionMetrics(TOTAL_BACK_OFF, retryCount, totalRequests * THOUSAND)) - .append(SECONDS); - } else { - metricBuilder.append(MIN_MAX_AVERAGE) - .append(retryCount.getValue()) - .append(REQUESTS + EQUAL + 0 + SECONDS); + .append(retryCount.getValue()) + .append(REQUESTS) + .append(getPrecisionMetrics(MIN_BACK_OFF, retryCount, THOUSAND)) + .append(SECONDS) + .append(getPrecisionMetrics(MAX_BACK_OFF, retryCount, THOUSAND)) + .append(SECONDS) + .append(getPrecisionMetrics(TOTAL_BACK_OFF, retryCount, + totalRequests * THOUSAND)) + .append(SECONDS); } } } @@ -269,29 +276,42 @@ private void getRetryMetrics(StringBuilder metricBuilder) { * @param metricBuilder the string builder to append the metrics */ private void getBaseMetrics(StringBuilder metricBuilder) { - long totalRequestsThrottled = getMetricValue(NUMBER_OF_NETWORK_FAILED_REQUESTS) + long totalRequestsThrottled = + getMetricValue(NUMBER_OF_NETWORK_FAILED_REQUESTS) + getMetricValue(NUMBER_OF_IOPS_THROTTLED_REQUESTS) + getMetricValue(NUMBER_OF_OTHER_THROTTLED_REQUESTS) + getMetricValue(NUMBER_OF_BANDWIDTH_THROTTLED_REQUESTS); - metricBuilder.append(BANDWIDTH_THROTTLED_REQUESTS) - .append(getMetricValue(NUMBER_OF_BANDWIDTH_THROTTLED_REQUESTS)) - .append(IOPS_THROTTLED_REQUESTS) - .append(getMetricValue(NUMBER_OF_IOPS_THROTTLED_REQUESTS)) - .append(OTHER_THROTTLED_REQUESTS) - .append(getMetricValue(NUMBER_OF_OTHER_THROTTLED_REQUESTS)) - .append(PERCENTAGE_THROTTLED_REQUESTS) - .append(formatPercent(totalRequestsThrottled/ (double) getMetricValue(TOTAL_NUMBER_OF_REQUESTS), 3)) - .append(NETWORK_ERROR_REQUESTS) - .append(getMetricValue(NUMBER_OF_NETWORK_FAILED_REQUESTS)) - .append(SUCCESS_REQUESTS_WITHOUT_RETRY) - .append(getMetricValue(NUMBER_OF_REQUESTS_SUCCEEDED_WITHOUT_RETRYING)) - .append(FAILED_REQUESTS) - .append(getMetricValue(NUMBER_OF_REQUESTS_FAILED)) - .append(TOTAL_REQUESTS_COUNT) - .append(getMetricValue(TOTAL_NUMBER_OF_REQUESTS)) - .append(MAX_RETRY) - .append(getMetricValue(MAX_RETRY_COUNT)); + appendIfPositive(metricBuilder, BANDWIDTH_THROTTLED_REQUESTS, + getMetricValue(NUMBER_OF_BANDWIDTH_THROTTLED_REQUESTS)); + + appendIfPositive(metricBuilder, IOPS_THROTTLED_REQUESTS, + getMetricValue(NUMBER_OF_IOPS_THROTTLED_REQUESTS)); + + appendIfPositive(metricBuilder, OTHER_THROTTLED_REQUESTS, + getMetricValue(NUMBER_OF_OTHER_THROTTLED_REQUESTS)); + + // For percentage, we always want it (even if 0%) + if (totalRequestsThrottled > 0) { + appendAlways(metricBuilder, PERCENTAGE_THROTTLED_REQUESTS, + formatPercent(totalRequestsThrottled / (double) getMetricValue( + TOTAL_NUMBER_OF_REQUESTS), 3)); + } + + appendIfPositive(metricBuilder, NETWORK_ERROR_REQUESTS, + getMetricValue(NUMBER_OF_NETWORK_FAILED_REQUESTS)); + + appendIfPositive(metricBuilder, SUCCESS_REQUESTS_WITHOUT_RETRY, + getMetricValue(NUMBER_OF_REQUESTS_SUCCEEDED_WITHOUT_RETRYING)); + + appendIfPositive(metricBuilder, FAILED_REQUESTS, + getMetricValue(NUMBER_OF_REQUESTS_FAILED)); + + appendIfPositive(metricBuilder, TOTAL_REQUESTS_COUNT, + getMetricValue(TOTAL_NUMBER_OF_REQUESTS)); + + appendIfPositive(metricBuilder, MAX_RETRY, + getMetricValue(MAX_RETRY_COUNT)); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 43171aef07316..b4122241347eb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -188,9 +188,10 @@ public AbfsBlobClient(final URL baseUrl, final AccessTokenProvider tokenProvider, final SASTokenProvider sasTokenProvider, final EncryptionContextProvider encryptionContextProvider, - final AbfsClientContext abfsClientContext) throws IOException { + final AbfsClientContext abfsClientContext, + final String fileSystemId) throws IOException { super(baseUrl, sharedKeyCredentials, abfsConfiguration, tokenProvider, sasTokenProvider, - encryptionContextProvider, abfsClientContext, AbfsServiceType.BLOB); + encryptionContextProvider, abfsClientContext, fileSystemId, AbfsServiceType.BLOB); this.azureAtomicRenameDirSet = new HashSet<>(Arrays.asList( abfsConfiguration.getAzureAtomicRenameDirs() .split(AbfsHttpConstants.COMMA))); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 18e8183754d5c..bc0763f802ee6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -25,12 +25,10 @@ import java.io.UnsupportedEncodingException; import java.lang.reflect.InvocationTargetException; import java.net.HttpURLConnection; -import java.net.InetAddress; import java.net.MalformedURLException; import java.net.URI; import java.net.URL; import java.net.URLEncoder; -import java.net.UnknownHostException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; @@ -41,6 +39,8 @@ import java.util.Timer; import java.util.TimerTask; import java.util.concurrent.Callable; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -88,6 +88,7 @@ import org.apache.hadoop.fs.azurebfs.utils.EncryptionType; import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; import org.apache.hadoop.fs.azurebfs.utils.UriUtils; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; @@ -111,6 +112,7 @@ import static org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.extractEtagHeader; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APN_VERSION; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CLIENT_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COLON; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DEFAULT_TIMEOUT; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FILESYSTEM; @@ -149,6 +151,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_VERSION; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_RESOURCE; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_TIMEOUT; +import static org.apache.hadoop.fs.azurebfs.enums.AbfsBackoffMetricsEnum.TOTAL_NUMBER_OF_REQUESTS; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.TAIL_LATENCY_REQUEST_TIMEOUT_ABBREVIATION; @@ -183,9 +186,10 @@ public abstract class AbfsClient implements Closeable { private SASTokenProvider sasTokenProvider; private final AbfsCounters abfsCounters; private Timer timer; - private final String abfsMetricUrl; - private boolean isMetricCollectionEnabled = false; - private final MetricFormat metricFormat; + private URL metricUrl; + private final AtomicBoolean isMetricCollectionEnabled + = new AtomicBoolean(false); + private MetricFormat metricFormat; private final AtomicBoolean isMetricCollectionStopped; private final int metricAnalysisPeriod; private final int metricIdlePeriod; @@ -198,13 +202,25 @@ public abstract class AbfsClient implements Closeable { private boolean renameResilience; private TimerTask runningTimerTask; - private boolean isSendMetricCall; private SharedKeyCredentials metricSharedkeyCredentials = null; + /** + * True if metric account name and key are different from storage account. + * False otherwise. + */ + private boolean hasSeparateMetricAccount = false; + private KeepAliveCache keepAliveCache; private AbfsApacheHttpClient abfsApacheHttpClient; + // Scheduler to emit aggregated metric based on time + private ScheduledExecutorService metricsEmitScheduler = null; + + private final String fileSystemId; + + private final AggregateMetricsManager aggregateMetricsManager; + private AbfsServiceType abfsServiceType; /** @@ -217,11 +233,13 @@ private AbfsClient(final URL baseUrl, final AbfsConfiguration abfsConfiguration, final EncryptionContextProvider encryptionContextProvider, final AbfsClientContext abfsClientContext, + final String fileSystemId, final AbfsServiceType abfsServiceType) throws IOException { this.baseUrl = baseUrl; this.sharedKeyCredentials = sharedKeyCredentials; String baseUrlString = baseUrl.toString(); - this.filesystem = baseUrlString.substring(baseUrlString.lastIndexOf(FORWARD_SLASH) + 1); + int indexLastForwardSlash = baseUrlString.lastIndexOf(FORWARD_SLASH); + this.filesystem = baseUrlString.substring(indexLastForwardSlash + 1); this.abfsConfiguration = abfsConfiguration; this.exponentialRetryPolicy = abfsClientContext.getExponentialRetryPolicy(); this.staticRetryPolicy = abfsClientContext.getStaticRetryPolicy(); @@ -231,6 +249,10 @@ private AbfsClient(final URL baseUrl, this.intercept = AbfsThrottlingInterceptFactory.getInstance(accountName, abfsConfiguration); this.tailLatencyTracker = AbfsTailLatencyTrackerFactory.getInstance(accountName, abfsConfiguration); this.renameResilience = abfsConfiguration.getRenameResilience(); + this.fileSystemId = fileSystemId; + this.aggregateMetricsManager = AggregateMetricsManager.get( + abfsConfiguration.getMetricEmitIntervalInMins(), + abfsConfiguration.getMaxMetricsCallsPerSecond()); this.abfsServiceType = abfsServiceType; if (encryptionContextProvider != null) { @@ -281,42 +303,84 @@ private AbfsClient(final URL baseUrl, new ThreadFactoryBuilder().setNameFormat("AbfsClient Lease Ops").setDaemon(true).build(); this.executorService = MoreExecutors.listeningDecorator( HadoopExecutors.newScheduledThreadPool(this.abfsConfiguration.getNumLeaseThreads(), tf)); - this.metricFormat = abfsConfiguration.getMetricFormat(); + this.isMetricCollectionEnabled.set(abfsConfiguration.isMetricsCollectionEnabled()); this.isMetricCollectionStopped = new AtomicBoolean(false); this.metricAnalysisPeriod = abfsConfiguration.getMetricAnalysisTimeout(); this.metricIdlePeriod = abfsConfiguration.getMetricIdleTimeout(); - if (StringUtils.isNotEmpty(metricFormat.toString())) { - String metricAccountName = abfsConfiguration.getMetricAccount(); - String metricAccountKey = abfsConfiguration.getMetricAccountKey(); - if (StringUtils.isNotEmpty(metricAccountName) && StringUtils.isNotEmpty(metricAccountKey)) { - isMetricCollectionEnabled = true; + if (isMetricCollectionEnabled()) { + try { + String metricAccountName = abfsConfiguration.getMetricAccount(); + String metricAccountKey = abfsConfiguration.getMetricAccountKey(); + this.metricFormat = abfsConfiguration.getMetricFormat(); abfsCounters.initializeMetrics(metricFormat); - int dotIndex = metricAccountName.indexOf(AbfsHttpConstants.DOT); - if (dotIndex <= 0) { - throw new InvalidUriException( - metricAccountName + " - account name is not fully qualified."); + if (isNotEmpty(metricAccountName) && isNotEmpty( + metricAccountKey)) { + int dotIndex = metricAccountName.indexOf(AbfsHttpConstants.DOT); + if (dotIndex <= 0) { + throw new InvalidUriException( + metricAccountName + " - account name is not fully qualified."); + } + try { + metricSharedkeyCredentials = new SharedKeyCredentials( + metricAccountName.substring(0, dotIndex), + metricAccountKey); + hasSeparateMetricAccount = true; + setMetricsUrl(metricAccountName.startsWith(HTTPS_SCHEME) ? + metricAccountName : HTTPS_SCHEME + COLON + + FORWARD_SLASH + FORWARD_SLASH + metricAccountName); + } catch (IllegalArgumentException e) { + throw new IOException( + "Exception while initializing metric credentials ", e); + } + } else { + setMetricsUrl(baseUrlString.substring(0, indexLastForwardSlash + 1)); } - try { - metricSharedkeyCredentials = new SharedKeyCredentials( - metricAccountName.substring(0, dotIndex), - metricAccountKey); - } catch (IllegalArgumentException e) { - throw new IOException("Exception while initializing metric credentials ", e); + + // register the client to Aggregated Metrics Manager + this.aggregateMetricsManager.registerClient(accountName, this); + + // Metrics emitter scheduler + this.metricsEmitScheduler + = Executors.newSingleThreadScheduledExecutor(); + // run every 1 minute to check the metrics count + this.metricsEmitScheduler.scheduleAtFixedRate( + () -> { + if (getAbfsCounters().getAbfsBackoffMetrics() + .getMetricValue(TOTAL_NUMBER_OF_REQUESTS) + >= getAbfsConfiguration().getMetricEmitThreshold()) { + emitCollectedMetrics(); + } + }, + abfsConfiguration.getMetricsEmitThresholdIntervalInSecs(), + abfsConfiguration.getMetricsEmitThresholdIntervalInSecs(), + TimeUnit.SECONDS); + + // run every metricInterval minutes + this.metricsEmitScheduler.scheduleAtFixedRate( + this::emitCollectedMetrics, + abfsConfiguration.getMetricEmitIntervalInMins(), + abfsConfiguration.getMetricEmitIntervalInMins(), + TimeUnit.MINUTES); + + // emit metrics based on idea time + if (abfsConfiguration.shouldEmitMetricsOnIdleTime()) { + this.timer = new Timer( + ABFS_CLIENT_TIMER_THREAD_NAME, true); + timer.schedule(new TimerTaskImpl(), + metricIdlePeriod, + metricIdlePeriod); } + } catch (Exception e) { + LOG.error("Metrics disabled. Failed to initialize metrics for {}", + baseUrl, e); + this.isMetricCollectionEnabled.set(false); } } - if (isMetricCollectionEnabled) { - this.timer = new Timer( - ABFS_CLIENT_TIMER_THREAD_NAME, true); - timer.schedule(new TimerTaskImpl(), - metricIdlePeriod, - metricIdlePeriod); - } + // Initialize write thread pool metrics if dynamic write thread pool scaling is enabled. if (abfsConfiguration.isDynamicWriteThreadPoolEnablement()) { abfsCounters.initializeWriteResourceUtilizationMetrics(); } - this.abfsMetricUrl = abfsConfiguration.getMetricUri(); // Initialize read thread pool metrics if ReadAheadV2 and its dynamic scaling feature are enabled. if (abfsConfiguration.isReadAheadV2Enabled() && abfsConfiguration.isReadAheadV2DynamicScalingEnabled()) { abfsCounters.initializeReadResourceUtilizationMetrics(); @@ -329,7 +393,7 @@ private AbfsClient(final URL baseUrl, Configuration.class).newInstance(abfsConfiguration.getRawConfiguration()); } catch (IllegalAccessException | InstantiationException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException e) { - LOG.error("IdentityTransformer Init Falied", e); + LOG.error("IdentityTransformer Init Failed", e); throw new IOException(e); } LOG.trace("IdentityTransformer init complete"); @@ -372,19 +436,26 @@ public AbfsClient(final URL baseUrl, final SASTokenProvider sasTokenProvider, final EncryptionContextProvider encryptionContextProvider, final AbfsClientContext abfsClientContext, - final AbfsServiceType abfsServiceType) + final String fileSystemId, final AbfsServiceType abfsServiceType) throws IOException { this(baseUrl, sharedKeyCredentials, abfsConfiguration, - encryptionContextProvider, abfsClientContext, abfsServiceType); + encryptionContextProvider, abfsClientContext, fileSystemId, abfsServiceType); this.sasTokenProvider = sasTokenProvider; this.tokenProvider = tokenProvider; } @Override public void close() throws IOException { - if (isMetricCollectionEnabled && runningTimerTask != null) { - runningTimerTask.cancel(); - timer.cancel(); + if (isMetricCollectionEnabled()) { + if (runningTimerTask != null) { + runningTimerTask.cancel(); + timer.cancel(); + } + if (metricsEmitScheduler != null && !metricsEmitScheduler.isShutdown()) { + metricsEmitScheduler.shutdownNow(); + } + emitCollectedMetrics(); + this.aggregateMetricsManager.deregisterClient(accountName, this); } if (keepAliveCache != null) { keepAliveCache.close(); @@ -462,6 +533,13 @@ AbfsThrottlingIntercept getIntercept() { return intercept; } + /** + * @return true if metric account name and key are different from storage account. + */ + public boolean hasSeparateMetricAccount() { + return hasSeparateMetricAccount; + } + /** * Create request headers for Rest Operation using the current API version. * @return default request headers @@ -638,8 +716,8 @@ public abstract AbfsRestOperation deleteFilesystem(TracingContext tracingContext * the path. * @param contextEncryptionAdapter: object that contains the encryptionContext and * encryptionKey created from the developer provided implementation of - * {@link org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider} - * @param tracingContext: Object of {@link org.apache.hadoop.fs.azurebfs.utils.TracingContext} + * {@link EncryptionContextProvider} + * @param tracingContext: Object of {@link TracingContext} * correlating to the current fs.create() request. * @return object of {@link AbfsRestOperation} which contain all the information * about the communication with the server. The information is in @@ -1581,18 +1659,15 @@ protected AccessTokenProvider getTokenProvider() { * * @return A TracingContext object configured for metric tracking. */ - private TracingContext getMetricTracingContext() { - String hostName; - try { - hostName = InetAddress.getLocalHost().getHostName(); - } catch (UnknownHostException e) { - hostName = "UnknownHost"; + private synchronized String getMetricsData() { + String metrics = abfsCounters.toString(); + if (StringUtils.isEmpty(metrics)) { + return null; } - return new TracingContext(TracingContext.validateClientCorrelationID( - abfsConfiguration.getClientCorrelationId()), - hostName, FSOperationType.GET_ATTR, true, - abfsConfiguration.getTracingHeaderFormat(), - null, abfsCounters.toString()); + abfsCounters.initializeMetrics(metricFormat); + return TracingContext.validateClientCorrelationID( + abfsConfiguration.getClientCorrelationId()) + COLON + fileSystemId + + COLON + metrics; } /** @@ -1604,7 +1679,7 @@ private TracingContext getMetricTracingContext() { boolean timerOrchestrator(TimerFunctionality timerFunctionality, TimerTask timerTask) { switch (timerFunctionality) { case RESUME: - if (isMetricCollectionEnabled && isMetricCollectionStopped.get()) { + if (isMetricCollectionEnabled() && isMetricCollectionStopped.get()) { synchronized (this) { if (isMetricCollectionStopped.get()) { resumeTimer(); @@ -1615,7 +1690,7 @@ boolean timerOrchestrator(TimerFunctionality timerFunctionality, TimerTask timer case SUSPEND: long now = System.currentTimeMillis(); long lastExecutionTime = abfsCounters.getLastExecutionTime().get(); - if (isMetricCollectionEnabled && (now - lastExecutionTime >= metricAnalysisPeriod)) { + if (isMetricCollectionEnabled() && (now - lastExecutionTime >= metricAnalysisPeriod)) { synchronized (this) { if (!isMetricCollectionStopped.get()) { timerTask.cancel(); @@ -1643,38 +1718,63 @@ private void resumeTimer() { * Initiates a metric call to the Azure Blob FileSystem (ABFS) for retrieving file system properties. * This method performs a HEAD request to the specified metric URL, using default headers and query parameters. * - * @param tracingContext The tracing context to be used for capturing tracing information. + * @param metricsData The tracing context to be used for capturing tracing information. * @throws IOException throws IOException. */ - public void getMetricCall(TracingContext tracingContext) throws IOException { - this.isSendMetricCall = true; + public void getMetricCall(String metricsData) throws IOException { + if (StringUtils.isEmpty(metricsData)) { + return; + } final List requestHeaders = createDefaultHeaders(); - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + final AbfsUriQueryBuilder abfsUriQueryBuilder + = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); // Construct the URL for the metric call // In case of blob storage, the URL is changed to DFS URL - final URL url = UriUtils.changeUrlFromBlobToDfs( - createRequestUrl(new URL(abfsMetricUrl), - EMPTY_STRING, abfsUriQueryBuilder.toString())); + final URL url = createRequestUrl(getMetricsUrl(), + EMPTY_STRING, abfsUriQueryBuilder.toString()); final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.GetFileSystemProperties, - HTTP_METHOD_HEAD, - url, - requestHeaders); + AbfsRestOperationType.GetFileSystemProperties, + HTTP_METHOD_HEAD, + url, + requestHeaders); + TracingContext tracingContext = new TracingContext( + TracingContext.validateClientCorrelationID( + abfsConfiguration.getClientCorrelationId()), + fileSystemId, FSOperationType.GET_ATTR, true, + TracingHeaderFormat.AGGREGATED_METRICS_FORMAT, + null, metricsData); try { op.execute(tracingContext); - } finally { - this.isSendMetricCall = false; + } catch (AzureBlobFileSystemException e) { + // Ignore the exception and continue. } } - public boolean isSendMetricCall() { - return isSendMetricCall; + public boolean isMetricCollectionEnabled() { + return isMetricCollectionEnabled.get(); } - public boolean isMetricCollectionEnabled() { - return isMetricCollectionEnabled; + /** + * Getter for metric URL. + * + * @return metricUrl + */ + @VisibleForTesting + public URL getMetricsUrl() { + return metricUrl; + } + + /** + * Setter for metric URL. + * Converts blob URL to dfs URL in case of blob storage account. + * + * @param urlString to be set as metricUrl. + * @throws IOException if URL is malformed. + */ + private void setMetricsUrl(String urlString) throws IOException { + metricUrl = UriUtils.changeUrlFromBlobToDfs(new URL(urlString)); } class TimerTaskImpl extends TimerTask { @@ -1683,19 +1783,27 @@ class TimerTaskImpl extends TimerTask { } @Override public void run() { - try { - if (timerOrchestrator(TimerFunctionality.SUSPEND, this)) { - try { - getMetricCall(getMetricTracingContext()); - } finally { - abfsCounters.initializeMetrics(metricFormat); - } - } - } catch (IOException e) { + if (timerOrchestrator(TimerFunctionality.SUSPEND, this)) { + emitCollectedMetrics(); } } } + /** + * Emits the collected metrics by making a metric call to the Azure Blob FileSystem (ABFS). + * This method checks if metric collection is enabled and, if so, attempts to perform + * a metric call using the configured tracing context. Any IOException encountered during + * the metric call is logged and ignored to prevent termination of the timer task. + * Finally, it re-initializes the metrics in the AbfsCounters instance using the specified + * metric format. + */ + public void emitCollectedMetrics() { + if (!isMetricCollectionEnabled()) { + return; + } + this.aggregateMetricsManager.recordMetric(accountName, getMetricsData()); + } + /** * Creates an AbfsRestOperation with additional parameters for buffer and SAS token. * @@ -1792,6 +1900,11 @@ protected Timer getTimer() { return timer; } + @VisibleForTesting + ScheduledExecutorService getMetricsEmitScheduler() { + return metricsEmitScheduler; + } + protected String getUserAgent() { return userAgent; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientHandler.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientHandler.java index 393811c256bdd..ea0a879305f99 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientHandler.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientHandler.java @@ -69,17 +69,18 @@ public AbfsClientHandler(final URL baseUrl, final AccessTokenProvider tokenProvider, final SASTokenProvider sasTokenProvider, final EncryptionContextProvider encryptionContextProvider, - final AbfsClientContext abfsClientContext) throws IOException { + final AbfsClientContext abfsClientContext, + final String fileSystemId) throws IOException { // This will initialize the default and ingress service types. // This is needed before creating the clients so that we can do cache warmup // only for default client. initServiceType(abfsConfiguration); this.dfsAbfsClient = createDfsClient(baseUrl, sharedKeyCredentials, abfsConfiguration, tokenProvider, sasTokenProvider, encryptionContextProvider, - abfsClientContext); + abfsClientContext, fileSystemId); this.blobAbfsClient = createBlobClient(baseUrl, sharedKeyCredentials, abfsConfiguration, tokenProvider, sasTokenProvider, encryptionContextProvider, - abfsClientContext); + abfsClientContext, fileSystemId); } /** @@ -154,7 +155,8 @@ private AbfsDfsClient createDfsClient(final URL baseUrl, final AccessTokenProvider tokenProvider, final SASTokenProvider sasTokenProvider, final EncryptionContextProvider encryptionContextProvider, - final AbfsClientContext abfsClientContext) throws IOException { + final AbfsClientContext abfsClientContext, + final String fileSystemId) throws IOException { URL dfsUrl = changeUrlFromBlobToDfs(baseUrl); LOG.debug( "Creating AbfsDfsClient with access token provider: %s and " @@ -162,7 +164,7 @@ private AbfsDfsClient createDfsClient(final URL baseUrl, tokenProvider, sasTokenProvider, dfsUrl); return new AbfsDfsClient(dfsUrl, creds, abfsConfiguration, tokenProvider, sasTokenProvider, encryptionContextProvider, - abfsClientContext); + abfsClientContext, fileSystemId); } /** @@ -184,7 +186,8 @@ private AbfsBlobClient createBlobClient(final URL baseUrl, final AccessTokenProvider tokenProvider, final SASTokenProvider sasTokenProvider, final EncryptionContextProvider encryptionContextProvider, - final AbfsClientContext abfsClientContext) throws IOException { + final AbfsClientContext abfsClientContext, + final String fileSystemId) throws IOException { URL blobUrl = changeUrlFromDfsToBlob(baseUrl); LOG.debug( "Creating AbfsBlobClient with access token provider: %s and " @@ -192,7 +195,7 @@ private AbfsBlobClient createBlobClient(final URL baseUrl, tokenProvider, sasTokenProvider, blobUrl); return new AbfsBlobClient(blobUrl, creds, abfsConfiguration, tokenProvider, sasTokenProvider, encryptionContextProvider, - abfsClientContext); + abfsClientContext, fileSystemId); } @Override diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index cf2449ea91834..208ea1b0c0694 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -172,9 +172,10 @@ public AbfsDfsClient(final URL baseUrl, final AccessTokenProvider tokenProvider, final SASTokenProvider sasTokenProvider, final EncryptionContextProvider encryptionContextProvider, - final AbfsClientContext abfsClientContext) throws IOException { + final AbfsClientContext abfsClientContext, + final String fileSystemId) throws IOException { super(baseUrl, sharedKeyCredentials, abfsConfiguration, tokenProvider, sasTokenProvider, - encryptionContextProvider, abfsClientContext, AbfsServiceType.DFS); + encryptionContextProvider, abfsClientContext, fileSystemId, AbfsServiceType.DFS); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java index d22f0ac4985dd..5a3f088f09ce1 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.hadoop.fs.azurebfs.services; import java.util.List; @@ -62,476 +63,538 @@ * This class is responsible for tracking and updating metrics related to reading footers in files. */ public class AbfsReadFooterMetrics extends AbstractAbfsStatisticsSource { - private static final Logger LOG = LoggerFactory.getLogger(AbfsReadFooterMetrics.class); - private static final String FOOTER_LENGTH = "20"; - private static final List FILE_TYPE_LIST = - Arrays.asList(FileType.values()); - private final Map fileTypeMetricsMap = - new ConcurrentHashMap<>(); + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsReadFooterMetrics.class); + + private static final String FOOTER_LENGTH = "20"; + + private static final List FILE_TYPE_LIST = + Arrays.asList(FileType.values()); + + private Map fileTypeMetricsMap = + new ConcurrentHashMap<>(); + + /** + * Inner class to handle file type checks. + */ + private static final class FileTypeMetrics { + + private final AtomicBoolean collectMetrics; + + private final AtomicBoolean collectMetricsForNextRead; + + private final AtomicBoolean collectLenMetrics; + + private final AtomicLong readCount; + + private final AtomicLong offsetOfFirstRead; + + private FileType fileType = null; + + private String sizeReadByFirstRead; + + private String offsetDiffBetweenFirstAndSecondRead; /** - * Inner class to handle file type checks. + * Constructor to initialize the file type metrics. */ - private static final class FileTypeMetrics { - private final AtomicBoolean collectMetrics; - private final AtomicBoolean collectMetricsForNextRead; - private final AtomicBoolean collectLenMetrics; - private final AtomicLong readCount; - private final AtomicLong offsetOfFirstRead; - private FileType fileType = null; - private String sizeReadByFirstRead; - private String offsetDiffBetweenFirstAndSecondRead; - - /** - * Constructor to initialize the file type metrics. - */ - private FileTypeMetrics() { - collectMetrics = new AtomicBoolean(false); - collectMetricsForNextRead = new AtomicBoolean(false); - collectLenMetrics = new AtomicBoolean(false); - readCount = new AtomicLong(0); - offsetOfFirstRead = new AtomicLong(0); - } - - /** - * Updates the file type based on the metrics collected. - */ - private void updateFileType() { - if (fileType == null) { - fileType = collectMetrics.get() && readCount.get() >= 2 - && haveEqualValues(sizeReadByFirstRead) - && haveEqualValues(offsetDiffBetweenFirstAndSecondRead) ? PARQUET : NON_PARQUET; - } - } - - /** - * Checks if the given value has equal parts. - * - * @param value the value to check - * @return true if the value has equal parts, false otherwise - */ - private boolean haveEqualValues(String value) { - String[] parts = value.split("_"); - return parts.length == 2 - && parts[0].equals(parts[1]); - } - - /** - * Increments the read count. - */ - private void incrementReadCount() { - readCount.incrementAndGet(); - } - - /** - * Returns the read count. - * - * @return the read count - */ - private long getReadCount() { - return readCount.get(); - } - - /** - * Sets the collect metrics flag. - * - * @param collect the value to set - */ - private void setCollectMetrics(boolean collect) { - collectMetrics.set(collect); - } - - /** - * Returns the collect metrics flag. - * - * @return the collect metrics flag - */ - private boolean getCollectMetrics() { - return collectMetrics.get(); - } - - /** - * Sets the collect metrics for the next read flag. - * - * @param collect the value to set - */ - private void setCollectMetricsForNextRead(boolean collect) { - collectMetricsForNextRead.set(collect); - } - - /** - * Returns the collect metrics for the next read flag. - * - * @return the collect metrics for the next read flag - */ - private boolean getCollectMetricsForNextRead() { - return collectMetricsForNextRead.get(); - } - - /** - * Returns the collect length metrics flag. - * - * @return the collect length metrics flag - */ - private boolean getCollectLenMetrics() { - return collectLenMetrics.get(); - } - - /** - * Sets the collect length metrics flag. - * - * @param collect the value to set - */ - private void setCollectLenMetrics(boolean collect) { - collectLenMetrics.set(collect); - } - - /** - * Sets the offset of the first read. - * - * @param offset the value to set - */ - private void setOffsetOfFirstRead(long offset) { - offsetOfFirstRead.set(offset); - } - - /** - * Returns the offset of the first read. - * - * @return the offset of the first read - */ - private long getOffsetOfFirstRead() { - return offsetOfFirstRead.get(); - } - - /** - * Sets the size read by the first read. - * - * @param size the value to set - */ - private void setSizeReadByFirstRead(String size) { - sizeReadByFirstRead = size; - } - - /** - * Returns the size read by the first read. - * - * @return the size read by the first read - */ - private String getSizeReadByFirstRead() { - return sizeReadByFirstRead; - } - - /** - * Sets the offset difference between the first and second read. - * - * @param offsetDiff the value to set - */ - private void setOffsetDiffBetweenFirstAndSecondRead(String offsetDiff) { - offsetDiffBetweenFirstAndSecondRead = offsetDiff; - } - - /** - * Returns the offset difference between the first and second read. - * - * @return the offset difference between the first and second read - */ - private String getOffsetDiffBetweenFirstAndSecondRead() { - return offsetDiffBetweenFirstAndSecondRead; - } - - /** - * Returns the file type. - * - * @return the file type - */ - private FileType getFileType() { - return fileType; - } + private FileTypeMetrics() { + collectMetrics = new AtomicBoolean(false); + collectMetricsForNextRead = new AtomicBoolean(false); + collectLenMetrics = new AtomicBoolean(false); + readCount = new AtomicLong(0); + offsetOfFirstRead = new AtomicLong(0); } /** - * Constructor to initialize the IOStatisticsStore with counters and mean statistics. + * Updates the file type based on the metrics collected. */ - public AbfsReadFooterMetrics() { - IOStatisticsStore ioStatisticsStore = iostatisticsStore() - .withCounters(getMetricNames(TYPE_COUNTER)) - .withMeanStatistics(getMetricNames(TYPE_MEAN)) - .build(); - setIOStatistics(ioStatisticsStore); + private void updateFileType() { + if (fileType == null) { + fileType = collectMetrics.get() && readCount.get() >= 2 + && haveEqualValues(sizeReadByFirstRead) + && haveEqualValues(offsetDiffBetweenFirstAndSecondRead) + ? PARQUET + : NON_PARQUET; + } } /** - * Returns the metric names for a specific statistic type. + * Checks if the given value has equal parts. * - * @param type the statistic type - * @return the metric names + * @param value the value to check + * @return true if the value has equal parts, false otherwise */ - private String[] getMetricNames(StatisticTypeEnum type) { - return Arrays.stream(AbfsReadFooterMetricsEnum.values()) - .filter(readFooterMetricsEnum -> readFooterMetricsEnum.getStatisticType().equals(type)) - .flatMap(readFooterMetricsEnum -> - FILE.equals(readFooterMetricsEnum.getType()) - ? FILE_TYPE_LIST.stream().map(fileType -> - getMetricName(fileType, readFooterMetricsEnum)) - : Stream.of(readFooterMetricsEnum.getName())) - .toArray(String[]::new); + private boolean haveEqualValues(String value) { + String[] parts = value.split("_"); + return parts.length == 2 + && parts[0].equals(parts[1]); } /** - * Returns the metric name for a specific file type and metric. - * - * @param fileType the type of the file - * @param readFooterMetricsEnum the metric to get the name for - * @return the metric name + * Increments the read count. */ - private String getMetricName(FileType fileType, - AbfsReadFooterMetricsEnum readFooterMetricsEnum) { - if (fileType == null || readFooterMetricsEnum == null) { - LOG.error("File type or ABFS read footer metrics should not be null"); - return EMPTY_STRING; - } - return fileType + COLON + readFooterMetricsEnum.getName(); + private void incrementReadCount() { + readCount.incrementAndGet(); } /** - * Looks up the counter value for a specific metric. + * Returns the read count. * - * @param fileType the type of the file - * @param abfsReadFooterMetricsEnum the metric to look up - * @return the counter value + * @return the read count */ - private long getCounterMetricValue(FileType fileType, - AbfsReadFooterMetricsEnum abfsReadFooterMetricsEnum) { - return lookupCounterValue(getMetricName(fileType, abfsReadFooterMetricsEnum)); + private long getReadCount() { + return readCount.get(); } /** - * Looks up the mean statistic value for a specific metric. + * Sets the collect metrics flag. * - * @param fileType the type of the file - * @param abfsReadFooterMetricsEnum the metric to look up - * @return the mean statistic value + * @param collect the value to set */ - private String getMeanMetricValue(FileType fileType, - AbfsReadFooterMetricsEnum abfsReadFooterMetricsEnum) { - return format(DOUBLE_PRECISION_FORMAT, - lookupMeanStatistic(getMetricName(fileType, abfsReadFooterMetricsEnum))); + private void setCollectMetrics(boolean collect) { + collectMetrics.set(collect); } /** - * Increments the value of a specific metric. + * Returns the collect metrics flag. * - * @param fileType the type of the file - * @param abfsReadFooterMetricsEnum the metric to increment + * @return the collect metrics flag */ - public void incrementMetricValue(FileType fileType, - AbfsReadFooterMetricsEnum abfsReadFooterMetricsEnum) { - incCounterValue(getMetricName(fileType, abfsReadFooterMetricsEnum)); + private boolean getCollectMetrics() { + return collectMetrics.get(); } /** - * Adds a mean statistic value for a specific metric. + * Sets the collect metrics for the next read flag. * - * @param fileType the type of the file - * @param abfsReadFooterMetricsEnum the metric to update - * @param value the new value of the metric + * @param collect the value to set */ - public void addMeanMetricValue(FileType fileType, - AbfsReadFooterMetricsEnum abfsReadFooterMetricsEnum, - long value) { - addMeanStatistic(getMetricName(fileType, abfsReadFooterMetricsEnum), value); + private void setCollectMetricsForNextRead(boolean collect) { + collectMetricsForNextRead.set(collect); } /** - * Returns the total number of files. + * Returns the collect metrics for the next read flag. * - * @return the total number of files + * @return the collect metrics for the next read flag */ - public Long getTotalFiles() { - return getCounterMetricValue(PARQUET, TOTAL_FILES) + getCounterMetricValue(NON_PARQUET, TOTAL_FILES); + private boolean getCollectMetricsForNextRead() { + return collectMetricsForNextRead.get(); } /** - * Updates the map with a new file path identifier. + * Returns the collect length metrics flag. * - * @param filePathIdentifier the file path identifier + * @return the collect length metrics flag */ - public void updateMap(String filePathIdentifier) { - fileTypeMetricsMap.computeIfAbsent(filePathIdentifier, key -> new FileTypeMetrics()); + private boolean getCollectLenMetrics() { + return collectLenMetrics.get(); } /** - * Checks and updates the metrics for a given file read. + * Sets the collect length metrics flag. * - * @param filePathIdentifier the file path identifier - * @param len the length of the read - * @param contentLength the total content length of the file - * @param nextReadPos the position of the next read + * @param collect the value to set */ - public void updateReadMetrics(final String filePathIdentifier, - final int len, - final long contentLength, - final long nextReadPos) { - FileTypeMetrics fileTypeMetrics = fileTypeMetricsMap.computeIfAbsent(filePathIdentifier, key -> new FileTypeMetrics()); - if (fileTypeMetrics.getReadCount() == 0 || (fileTypeMetrics.getReadCount() >= 1 && fileTypeMetrics.getCollectMetrics())) { - updateMetrics(fileTypeMetrics, len, contentLength, nextReadPos); - } + private void setCollectLenMetrics(boolean collect) { + collectLenMetrics.set(collect); } /** - * Updates metrics for a specific file identified by filePathIdentifier. + * Sets the offset of the first read. * - * @param fileTypeMetrics File metadata to know file type. - * @param len The length of the read operation. - * @param contentLength The total content length of the file. - * @param nextReadPos The position of the next read operation. + * @param offset the value to set */ - private void updateMetrics(FileTypeMetrics fileTypeMetrics, - int len, - long contentLength, - long nextReadPos) { - fileTypeMetrics.incrementReadCount(); - - long readCount = fileTypeMetrics.getReadCount(); - - if (readCount == 1) { - handleFirstRead(fileTypeMetrics, nextReadPos, len, contentLength); - } else if (readCount == 2) { - handleSecondRead(fileTypeMetrics, nextReadPos, len, contentLength); - } else { - handleFurtherRead(fileTypeMetrics, len); - } + private void setOffsetOfFirstRead(long offset) { + offsetOfFirstRead.set(offset); } /** - * Handles the first read operation by checking if the current read position is near the end of the file. - * If it is, updates the {@link FileTypeMetrics} object to enable metrics collection and records the first read's - * offset and size. + * Returns the offset of the first read. * - * @param fileTypeMetrics The {@link FileTypeMetrics} object to update with metrics and read details. - * @param nextReadPos The position where the next read will start. - * @param len The length of the current read operation. - * @param contentLength The total length of the file content. + * @return the offset of the first read */ - private void handleFirstRead(FileTypeMetrics fileTypeMetrics, - long nextReadPos, - int len, - long contentLength) { - if (nextReadPos >= contentLength - (long) Integer.parseInt(FOOTER_LENGTH) * ONE_KB) { - fileTypeMetrics.setCollectMetrics(true); - fileTypeMetrics.setCollectMetricsForNextRead(true); - fileTypeMetrics.setOffsetOfFirstRead(nextReadPos); - fileTypeMetrics.setSizeReadByFirstRead(len + "_" + Math.abs(contentLength - nextReadPos)); - } + private long getOffsetOfFirstRead() { + return offsetOfFirstRead.get(); } /** - * Handles the second read operation by checking if metrics collection is enabled for the next read. - * If it is, calculates the offset difference between the first and second reads, updates the {@link FileTypeMetrics} - * object with this information, and sets the file type. Then, updates the metrics data. + * Sets the size read by the first read. * - * @param fileTypeMetrics The {@link FileTypeMetrics} object to update with metrics and read details. - * @param nextReadPos The position where the next read will start. - * @param len The length of the current read operation. - * @param contentLength The total length of the file content. + * @param size the value to set */ - private void handleSecondRead(FileTypeMetrics fileTypeMetrics, - long nextReadPos, - int len, - long contentLength) { - if (fileTypeMetrics.getCollectMetricsForNextRead()) { - long offsetDiff = Math.abs(nextReadPos - fileTypeMetrics.getOffsetOfFirstRead()); - fileTypeMetrics.setOffsetDiffBetweenFirstAndSecondRead(len + "_" + offsetDiff); - fileTypeMetrics.setCollectLenMetrics(true); - fileTypeMetrics.updateFileType(); - updateMetricsData(fileTypeMetrics, len, contentLength); - } + private void setSizeReadByFirstRead(String size) { + sizeReadByFirstRead = size; } /** - * Handles further read operations beyond the second read. If metrics collection is enabled and the file type is set, - * updates the read length requested and increments the read count for the specific file type. + * Returns the size read by the first read. * - * @param fileTypeMetrics The {@link FileTypeMetrics} object containing metrics and read details. - * @param len The length of the current read operation. + * @return the size read by the first read */ - private void handleFurtherRead(FileTypeMetrics fileTypeMetrics, int len) { - if (fileTypeMetrics.getCollectLenMetrics() && fileTypeMetrics.getFileType() != null) { - FileType fileType = fileTypeMetrics.getFileType(); - addMeanMetricValue(fileType, AVG_READ_LEN_REQUESTED, len); - } + private String getSizeReadByFirstRead() { + return sizeReadByFirstRead; } /** - * Updates the metrics data for a specific file identified by the {@link FileTypeMetrics} object. - * This method calculates and updates various metrics such as read length requested, file length, - * size read by the first read, and offset differences between reads. + * Sets the offset difference between the first and second read. * - * @param fileTypeMetrics The {@link FileTypeMetrics} object containing metrics and read details. - * @param len The length of the current read operation. - * @param contentLength The total length of the file content. + * @param offsetDiff the value to set */ - private void updateMetricsData(FileTypeMetrics fileTypeMetrics, - int len, - long contentLength) { - long sizeReadByFirstRead = Long.parseLong(fileTypeMetrics.getSizeReadByFirstRead().split("_")[0]); - long firstOffsetDiff = Long.parseLong(fileTypeMetrics.getSizeReadByFirstRead().split("_")[1]); - long secondOffsetDiff = Long.parseLong(fileTypeMetrics.getOffsetDiffBetweenFirstAndSecondRead().split("_")[1]); - FileType fileType = fileTypeMetrics.getFileType(); - - addMeanMetricValue(fileType, AVG_READ_LEN_REQUESTED, len); - addMeanMetricValue(fileType, AVG_READ_LEN_REQUESTED, sizeReadByFirstRead); - addMeanMetricValue(fileType, AVG_FILE_LENGTH, contentLength); - addMeanMetricValue(fileType, AVG_SIZE_READ_BY_FIRST_READ, sizeReadByFirstRead); - addMeanMetricValue(fileType, AVG_OFFSET_DIFF_BETWEEN_FIRST_AND_SECOND_READ, len); - addMeanMetricValue(fileType, AVG_FIRST_OFFSET_DIFF, firstOffsetDiff); - addMeanMetricValue(fileType, AVG_SECOND_OFFSET_DIFF, secondOffsetDiff); - incrementMetricValue(fileType, TOTAL_FILES); + private void setOffsetDiffBetweenFirstAndSecondRead(String offsetDiff) { + offsetDiffBetweenFirstAndSecondRead = offsetDiff; } /** - * Appends the metrics for a specific file type to the given metric builder. + * Returns the offset difference between the first and second read. * - * @param metricBuilder the metric builder to append the metrics to - * @param fileType the file type to append the metrics for + * @return the offset difference between the first and second read */ - private void appendMetrics(StringBuilder metricBuilder, FileType fileType) { - long totalFiles = getCounterMetricValue(fileType, TOTAL_FILES); - if (totalFiles <= 0) { - return; - } - - String sizeReadByFirstRead = getMeanMetricValue(fileType, AVG_SIZE_READ_BY_FIRST_READ); - String offsetDiffBetweenFirstAndSecondRead = getMeanMetricValue(fileType, AVG_OFFSET_DIFF_BETWEEN_FIRST_AND_SECOND_READ); - - if (NON_PARQUET.equals(fileType)) { - sizeReadByFirstRead += CHAR_UNDERSCORE + getMeanMetricValue(fileType, AVG_FIRST_OFFSET_DIFF); - offsetDiffBetweenFirstAndSecondRead += CHAR_UNDERSCORE + getMeanMetricValue(fileType, AVG_SECOND_OFFSET_DIFF); - } - - metricBuilder.append(CHAR_DOLLAR) - .append(fileType) - .append(FIRST_READ) - .append(sizeReadByFirstRead) - .append(SECOND_READ) - .append(offsetDiffBetweenFirstAndSecondRead) - .append(FILE_LENGTH) - .append(getMeanMetricValue(fileType, AVG_FILE_LENGTH)) - .append(READ_LENGTH) - .append(getMeanMetricValue(fileType, AVG_READ_LEN_REQUESTED)); + private String getOffsetDiffBetweenFirstAndSecondRead() { + return offsetDiffBetweenFirstAndSecondRead; } /** - * Returns the read footer metrics for all file types. + * Returns the file type. * - * @return the read footer metrics as a string + * @return the file type */ - @Override - public String toString() { - StringBuilder readFooterMetric = new StringBuilder(); - appendMetrics(readFooterMetric, PARQUET); - appendMetrics(readFooterMetric, NON_PARQUET); - return readFooterMetric.toString(); + private FileType getFileType() { + return fileType; + } + } + + /** + * Constructor to initialize the IOStatisticsStore with counters and mean statistics. + */ + public AbfsReadFooterMetrics() { + IOStatisticsStore ioStatisticsStore = iostatisticsStore() + .withCounters(getMetricNames(TYPE_COUNTER)) + .withMeanStatistics(getMetricNames(TYPE_MEAN)) + .build(); + setIOStatistics(ioStatisticsStore); + } + + /** + * Constructor to initialize the IOStatisticsStore with counters and mean statistics, + * and a map to track file type metrics. + * + * @param fileTypeMetricsMap the map to track file type metrics + */ + public AbfsReadFooterMetrics(Map fileTypeMetricsMap) { + IOStatisticsStore ioStatisticsStore = iostatisticsStore() + .withCounters(getMetricNames(TYPE_COUNTER)) + .withMeanStatistics(getMetricNames(TYPE_MEAN)) + .build(); + setIOStatistics(ioStatisticsStore); + this.fileTypeMetricsMap = fileTypeMetricsMap; + } + + /** + * Returns the map of file type metrics. + * + * @return the map of file type metrics + */ + public Map getFileTypeMetricsMap() { + return fileTypeMetricsMap; + } + + /** + * Returns the metric names for a specific statistic type. + * + * @param type the statistic type + * @return the metric names + */ + private String[] getMetricNames(StatisticTypeEnum type) { + return Arrays.stream(AbfsReadFooterMetricsEnum.values()) + .filter( + readFooterMetricsEnum -> readFooterMetricsEnum.getStatisticType() + .equals(type)) + .flatMap(readFooterMetricsEnum -> + FILE.equals(readFooterMetricsEnum.getType()) + ? FILE_TYPE_LIST.stream().map(fileType -> + getMetricName(fileType, readFooterMetricsEnum)) + : Stream.of(readFooterMetricsEnum.getName())) + .toArray(String[]::new); + } + + /** + * Returns the metric name for a specific file type and metric. + * + * @param fileType the type of the file + * @param readFooterMetricsEnum the metric to get the name for + * @return the metric name + */ + private String getMetricName(FileType fileType, + AbfsReadFooterMetricsEnum readFooterMetricsEnum) { + if (fileType == null || readFooterMetricsEnum == null) { + LOG.error("File type or ABFS read footer metrics should not be null"); + return EMPTY_STRING; + } + return fileType + COLON + readFooterMetricsEnum.getName(); + } + + /** + * Looks up the counter value for a specific metric. + * + * @param fileType the type of the file + * @param abfsReadFooterMetricsEnum the metric to look up + * @return the counter value + */ + private long getCounterMetricValue(FileType fileType, + AbfsReadFooterMetricsEnum abfsReadFooterMetricsEnum) { + return lookupCounterValue( + getMetricName(fileType, abfsReadFooterMetricsEnum)); + } + + /** + * Looks up the mean statistic value for a specific metric. + * + * @param fileType the type of the file + * @param abfsReadFooterMetricsEnum the metric to look up + * @return the mean statistic value + */ + private String getMeanMetricValue(FileType fileType, + AbfsReadFooterMetricsEnum abfsReadFooterMetricsEnum) { + return format(DOUBLE_PRECISION_FORMAT, + lookupMeanStatistic( + getMetricName(fileType, abfsReadFooterMetricsEnum))); + } + + /** + * Increments the value of a specific metric. + * + * @param fileType the type of the file + * @param abfsReadFooterMetricsEnum the metric to increment + */ + public void incrementMetricValue(FileType fileType, + AbfsReadFooterMetricsEnum abfsReadFooterMetricsEnum) { + incCounterValue(getMetricName(fileType, abfsReadFooterMetricsEnum)); + } + + /** + * Adds a mean statistic value for a specific metric. + * + * @param fileType the type of the file + * @param abfsReadFooterMetricsEnum the metric to update + * @param value the new value of the metric + */ + public void addMeanMetricValue(FileType fileType, + AbfsReadFooterMetricsEnum abfsReadFooterMetricsEnum, + long value) { + addMeanStatistic(getMetricName(fileType, abfsReadFooterMetricsEnum), value); + } + + /** + * Returns the total number of files. + * + * @return the total number of files + */ + public Long getTotalFiles() { + return getCounterMetricValue(PARQUET, TOTAL_FILES) + getCounterMetricValue( + NON_PARQUET, TOTAL_FILES); + } + + /** + * Updates the map with a new file path identifier. + * + * @param filePathIdentifier the file path identifier + */ + public void updateMap(String filePathIdentifier) { + fileTypeMetricsMap.computeIfAbsent(filePathIdentifier, + key -> new FileTypeMetrics()); + } + + /** + * Checks and updates the metrics for a given file read. + * + * @param filePathIdentifier the file path identifier + * @param len the length of the read + * @param contentLength the total content length of the file + * @param nextReadPos the position of the next read + */ + public void updateReadMetrics(final String filePathIdentifier, + final int len, + final long contentLength, + final long nextReadPos) { + FileTypeMetrics fileTypeMetrics = fileTypeMetricsMap.computeIfAbsent( + filePathIdentifier, key -> new FileTypeMetrics()); + if (fileTypeMetrics.getReadCount() == 0 || ( + fileTypeMetrics.getReadCount() >= 1 + && fileTypeMetrics.getCollectMetrics())) { + updateMetrics(fileTypeMetrics, len, contentLength, nextReadPos); + } else { + // update metrics for non-footer reads + } + } + + /** + * Updates metrics for a specific file identified by filePathIdentifier. + * + * @param fileTypeMetrics File metadata to know file type. + * @param len The length of the read operation. + * @param contentLength The total content length of the file. + * @param nextReadPos The position of the next read operation. + */ + private void updateMetrics(FileTypeMetrics fileTypeMetrics, + int len, + long contentLength, + long nextReadPos) { + fileTypeMetrics.incrementReadCount(); + + long readCount = fileTypeMetrics.getReadCount(); + + if (readCount == 1) { + handleFirstRead(fileTypeMetrics, nextReadPos, len, contentLength); + } else if (readCount == 2) { + handleSecondRead(fileTypeMetrics, nextReadPos, len, contentLength); + } else { + handleFurtherRead(fileTypeMetrics, len); + } + } + + /** + * Handles the first read operation by checking if the current read position is near the end of the file. + * If it is, updates the {@link FileTypeMetrics} object to enable metrics collection and records the first read's + * offset and size. + * + * @param fileTypeMetrics The {@link FileTypeMetrics} object to update with metrics and read details. + * @param nextReadPos The position where the next read will start. + * @param len The length of the current read operation. + * @param contentLength The total length of the file content. + */ + private void handleFirstRead(FileTypeMetrics fileTypeMetrics, + long nextReadPos, + int len, + long contentLength) { + if (nextReadPos + >= contentLength - (long) Integer.parseInt(FOOTER_LENGTH) * ONE_KB) { + fileTypeMetrics.setCollectMetrics(true); + fileTypeMetrics.setCollectMetricsForNextRead(true); + fileTypeMetrics.setOffsetOfFirstRead(nextReadPos); + fileTypeMetrics.setSizeReadByFirstRead( + len + "_" + Math.abs(contentLength - nextReadPos)); + } + } + + /** + * Handles the second read operation by checking if metrics collection is enabled for the next read. + * If it is, calculates the offset difference between the first and second reads, updates the {@link FileTypeMetrics} + * object with this information, and sets the file type. Then, updates the metrics data. + * + * @param fileTypeMetrics The {@link FileTypeMetrics} object to update with metrics and read details. + * @param nextReadPos The position where the next read will start. + * @param len The length of the current read operation. + * @param contentLength The total length of the file content. + */ + private void handleSecondRead(FileTypeMetrics fileTypeMetrics, + long nextReadPos, + int len, + long contentLength) { + if (fileTypeMetrics.getCollectMetricsForNextRead()) { + long offsetDiff = Math.abs( + nextReadPos - fileTypeMetrics.getOffsetOfFirstRead()); + fileTypeMetrics.setOffsetDiffBetweenFirstAndSecondRead( + len + "_" + offsetDiff); + fileTypeMetrics.setCollectLenMetrics(true); + fileTypeMetrics.updateFileType(); + updateMetricsData(fileTypeMetrics, len, contentLength); } + } + + /** + * Handles further read operations beyond the second read. If metrics collection is enabled and the file type is set, + * updates the read length requested and increments the read count for the specific file type. + * + * @param fileTypeMetrics The {@link FileTypeMetrics} object containing metrics and read details. + * @param len The length of the current read operation. + */ + private void handleFurtherRead(FileTypeMetrics fileTypeMetrics, int len) { + if (fileTypeMetrics.getCollectLenMetrics() + && fileTypeMetrics.getFileType() != null) { + FileType fileType = fileTypeMetrics.getFileType(); + addMeanMetricValue(fileType, AVG_READ_LEN_REQUESTED, len); + } + } + + /** + * Updates the metrics data for a specific file identified by the {@link FileTypeMetrics} object. + * This method calculates and updates various metrics such as read length requested, file length, + * size read by the first read, and offset differences between reads. + * + * @param fileTypeMetrics The {@link FileTypeMetrics} object containing metrics and read details. + * @param len The length of the current read operation. + * @param contentLength The total length of the file content. + */ + private void updateMetricsData(FileTypeMetrics fileTypeMetrics, + int len, + long contentLength) { + long sizeReadByFirstRead = Long.parseLong( + fileTypeMetrics.getSizeReadByFirstRead().split("_")[0]); + long firstOffsetDiff = Long.parseLong( + fileTypeMetrics.getSizeReadByFirstRead().split("_")[1]); + long secondOffsetDiff = Long.parseLong( + fileTypeMetrics.getOffsetDiffBetweenFirstAndSecondRead().split("_")[1]); + FileType fileType = fileTypeMetrics.getFileType(); + + addMeanMetricValue(fileType, AVG_READ_LEN_REQUESTED, len); + addMeanMetricValue(fileType, AVG_READ_LEN_REQUESTED, sizeReadByFirstRead); + addMeanMetricValue(fileType, AVG_FILE_LENGTH, contentLength); + addMeanMetricValue(fileType, AVG_SIZE_READ_BY_FIRST_READ, + sizeReadByFirstRead); + addMeanMetricValue(fileType, AVG_OFFSET_DIFF_BETWEEN_FIRST_AND_SECOND_READ, + len); + addMeanMetricValue(fileType, AVG_FIRST_OFFSET_DIFF, firstOffsetDiff); + addMeanMetricValue(fileType, AVG_SECOND_OFFSET_DIFF, secondOffsetDiff); + incrementMetricValue(fileType, TOTAL_FILES); + } + + /** + * Appends the metrics for a specific file type to the given metric builder. + * + * @param metricBuilder the metric builder to append the metrics to + * @param fileType the file type to append the metrics for + */ + private void appendMetrics(StringBuilder metricBuilder, FileType fileType) { + long totalFiles = getCounterMetricValue(fileType, TOTAL_FILES); + if (totalFiles <= 0) { + return; + } + + String sizeReadByFirstRead = getMeanMetricValue(fileType, + AVG_SIZE_READ_BY_FIRST_READ); + String offsetDiffBetweenFirstAndSecondRead = getMeanMetricValue(fileType, + AVG_OFFSET_DIFF_BETWEEN_FIRST_AND_SECOND_READ); + + if (NON_PARQUET.equals(fileType)) { + sizeReadByFirstRead += CHAR_UNDERSCORE + getMeanMetricValue(fileType, + AVG_FIRST_OFFSET_DIFF); + offsetDiffBetweenFirstAndSecondRead += CHAR_UNDERSCORE + + getMeanMetricValue(fileType, AVG_SECOND_OFFSET_DIFF); + } + + appendAlways(metricBuilder, CHAR_DOLLAR, fileType.toString()); + appendAlways(metricBuilder, FIRST_READ, sizeReadByFirstRead); + appendAlways(metricBuilder, SECOND_READ, + offsetDiffBetweenFirstAndSecondRead); + appendAlways(metricBuilder, FILE_LENGTH, + getMeanMetricValue(fileType, AVG_FILE_LENGTH)); + appendAlways(metricBuilder, READ_LENGTH, + getMeanMetricValue(fileType, AVG_READ_LEN_REQUESTED)); + } + + /** + * Returns the read footer metrics for all file types. + * + * @return the read footer metrics as a string + */ + @Override + public String toString() { + StringBuilder readFooterMetric = new StringBuilder(); + appendMetrics(readFooterMetric, PARQUET); + appendMetrics(readFooterMetric, NON_PARQUET); + return readFooterMetric.toString(); + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index ff7300d280fdb..35d5fa078e776 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -110,7 +110,7 @@ public class AbfsRestOperation { private final int maxIoRetries; private AbfsHttpOperation result; private final AbfsCounters abfsCounters; - private AbfsBackoffMetrics abfsBackoffMetrics; + private final AbfsBackoffMetrics abfsBackoffMetrics; /** * This variable contains the reason of last API call within the same * AbfsRestOperation object. @@ -239,6 +239,8 @@ String getSasToken() { this.abfsCounters = client.getAbfsCounters(); if (abfsCounters != null) { this.abfsBackoffMetrics = abfsCounters.getAbfsBackoffMetrics(); + } else { + this.abfsBackoffMetrics = null; } this.maxIoRetries = abfsConfiguration.getMaxIoRetries(); this.intercept = client.getIntercept(); @@ -324,8 +326,7 @@ void completeExecute(TracingContext tracingContext) retryCount = 0; retryPolicy = client.getExponentialRetryPolicy(); LOG.debug("First execution of REST operation - {}", operationType); - long sleepDuration = 0L; - if (abfsBackoffMetrics != null) { + if (abfsBackoffMetrics != null && !tracingContext.isMetricCall()) { synchronized (this) { abfsBackoffMetrics.incrementMetricValue(TOTAL_NUMBER_OF_REQUESTS); } @@ -338,7 +339,7 @@ void completeExecute(TracingContext tracingContext) LOG.debug("Rest operation {} failed with failureReason: {}. Retrying with retryCount = {}, retryPolicy: {} and sleepInterval: {}", operationType, failureReason, retryCount, retryPolicy.getAbbreviation(), retryInterval); if (abfsBackoffMetrics != null) { - updateBackoffTimeMetrics(retryCount, sleepDuration); + updateBackoffTimeMetrics(retryCount, retryInterval); } Thread.sleep(retryInterval); } catch (InterruptedException ex) { @@ -412,7 +413,7 @@ private boolean executeHttpOperation(final int retryCount, incrementCounter(AbfsStatistic.CONNECTIONS_MADE, 1); tracingContext.constructHeader(httpOperation, failureReason, retryPolicy.getAbbreviation()); - signRequest(httpOperation, hasRequestBody ? bufferLength : 0); + signRequest(httpOperation, hasRequestBody ? bufferLength : 0, tracingContext.isMetricCall()); } catch (IOException e) { LOG.debug("Auth failure: {}, {}", method, url); @@ -580,8 +581,9 @@ private void registerApacheHttpClientIoException() { * @throws IOException failure */ @VisibleForTesting - public void signRequest(final AbfsHttpOperation httpOperation, int bytesToSign) throws IOException { - if (client.isSendMetricCall()) { + public void signRequest(final AbfsHttpOperation httpOperation, int bytesToSign, + boolean isMetricCall) throws IOException { + if (isMetricCall && client.hasSeparateMetricAccount()) { client.getMetricSharedkeyCredentials().signRequest(httpOperation, bytesToSign); } else { switch (client.getAuthType()) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbstractAbfsStatisticsSource.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbstractAbfsStatisticsSource.java index a8f69cf72e2ce..3eeb73b70964c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbstractAbfsStatisticsSource.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbstractAbfsStatisticsSource.java @@ -141,6 +141,30 @@ protected double lookupMeanStatistic(String name) { return ioStatisticsStore.meanStatistics().get(name).mean(); } + /** + * Appends the label and value to the StringBuilder if the value is positive. + * + * @param sb the StringBuilder to append to + * @param label the label to append + * @param value the value to check and append + */ + protected void appendIfPositive(StringBuilder sb, String label, long value) { + if (value > 0) { + sb.append(label).append(value); + } + } + + /** + * Appends the label and value to the StringBuilder unconditionally. + * + * @param sb the StringBuilder to append to + * @param label the label to append + * @param value the value to append + */ + protected void appendAlways(StringBuilder sb, String label, String value) { + sb.append(label).append(value); + } + /** * Returns a string representation of the AbstractAbfsStatisticsSource. * diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java new file mode 100644 index 0000000000000..28d27ae5922e3 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; +import org.apache.hadoop.fs.azurebfs.utils.SimpleRateLimiter; + +/** + * AggregateMetricsManager manages metrics collection and dispatching + * for multiple AbfsClients across different accounts. + */ +public final class AggregateMetricsManager { + + /** + * Singleton instance of AggregateMetricsManager. + */ + private static volatile AggregateMetricsManager INSTANCE; + + // Map of account name to MetricsBucket. + private final ConcurrentHashMap buckets = + new ConcurrentHashMap<>(); + + // Rate limiter to control the rate of dispatching metrics. + private final SimpleRateLimiter rateLimiter; + + // Scheduler for periodic dispatching of metrics. + private final ScheduledExecutorService scheduler; + + // Private constructor to enforce singleton pattern. + private AggregateMetricsManager(final long dispatchIntervalInMins, + final int permitsPerSecond) throws InvalidConfigurationValueException { + + if (dispatchIntervalInMins <= 0) { + throw new InvalidConfigurationValueException( + "dispatchIntervalInMins must be > 0"); + } + + if (permitsPerSecond <= 0) { + throw new InvalidConfigurationValueException( + "permitsPerSecond must be > 0"); + } + + this.rateLimiter = new SimpleRateLimiter(permitsPerSecond); + + // Initialize scheduler for periodic dispatching of metrics. + this.scheduler = Executors.newSingleThreadScheduledExecutor(r -> { + Thread t = new Thread(r, "ABFS-Aggregated-Metrics-Dispatcher"); + t.setDaemon(true); + return t; + }); + + // Schedule periodic dispatching of metrics. + this.scheduler.scheduleAtFixedRate( + this::dispatchMetrics, + dispatchIntervalInMins, + dispatchIntervalInMins, + TimeUnit.MINUTES); + + // Add shutdown hook to dispatch remaining metrics on JVM shutdown. + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + dispatchMetrics(); + scheduler.shutdown(); + })); + } + + /** + * Get the singleton instance of AggregateMetricsManager. + * + * @param dispatchIntervalInMins Interval in minutes for dispatching metrics. + * @param permitsPerSecond Rate limit for dispatching metrics. + * @return Singleton instance of AggregateMetricsManager. + */ + public static AggregateMetricsManager get(final long dispatchIntervalInMins, + final int permitsPerSecond) { + + AggregateMetricsManager instance = INSTANCE; + if (instance != null) { + return instance; + } + + synchronized (AggregateMetricsManager.class) { + if (INSTANCE == null) { + try { + INSTANCE = new AggregateMetricsManager( + dispatchIntervalInMins, permitsPerSecond); + } catch (InvalidConfigurationValueException e) { + throw new RuntimeException( + "Failed to initialize AggregateMetricsManager", e); + } + } + return INSTANCE; + } + } + + /** + * Register an AbfsClient with the manager. + * @param account Account name. + * @param abfsClient AbfsClient instance. + */ + public void registerClient(String account, AbfsClient abfsClient) { + if (StringUtils.isEmpty(account) || abfsClient == null) { + return; + } + + buckets.computeIfAbsent(account, + key -> new MetricsBucket(rateLimiter)) + .registerClient(abfsClient); + } + + /** + * Deregister an AbfsClient from the manager. + * @param account Account name. + * @param abfsClient AbfsClient instance. + * @return true if the client was deregistered, false otherwise. + */ + public boolean deregisterClient(String account, AbfsClient abfsClient) { + if (StringUtils.isEmpty(account) || abfsClient == null) { + return false; + } + + MetricsBucket bucket = buckets.get(account); + if (bucket == null) { + return false; + } + + boolean isRemoved = bucket.deregisterClient(abfsClient); + + if (bucket.isEmpty()) { + buckets.remove(account, bucket); + } + return isRemoved; + } + + /** + * Record metrics data for a specific account. + * @param accountName Account name. + * @param metricsData Metrics data to record. + */ + public void recordMetric(String accountName, String metricsData) { + if (StringUtils.isEmpty(accountName) + || StringUtils.isEmpty(metricsData)) { + return; + } + + MetricsBucket bucket = buckets.get(accountName); + if (bucket == null) { + return; + } + + bucket.addRequest(metricsData); + } + + // Dispatch metrics for all buckets. + private void dispatchMetrics() { + buckets.values().forEach(MetricsBucket::drainAndSendIfReady); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/KeepAliveCache.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/KeepAliveCache.java index 1ad1ba899dfd6..09dadc29ea979 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/KeepAliveCache.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/KeepAliveCache.java @@ -32,6 +32,7 @@ import org.apache.hadoop.fs.ClosedIOException; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.http.HttpClientConnection; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.KEEP_ALIVE_CACHE_CLOSED; @@ -123,6 +124,10 @@ class KeepAliveCache extends LinkedBlockingDeque return thread; }); } + + PoolingHttpClientConnectionManager poolingHttpClientConnectionManager = new PoolingHttpClientConnectionManager(); + poolingHttpClientConnectionManager.setMaxTotal(maxCacheConnections); + poolingHttpClientConnectionManager.setDefaultMaxPerRoute(maxCacheConnections); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/MetricsBucket.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/MetricsBucket.java new file mode 100644 index 0000000000000..ad93dcd4b658a --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/MetricsBucket.java @@ -0,0 +1,178 @@ +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.atomic.AtomicReference; + +import org.apache.hadoop.fs.azurebfs.utils.SimpleRateLimiter; + +/** + * MetricsBucket holds metrics for multiple AbfsClients and + * dispatches them in batches, respecting rate limits. + */ +final class MetricsBucket { + + // Rate limiter to control the rate of dispatching metrics. + private final SimpleRateLimiter rateLimiter; + + // Buffer to hold metrics before sending. + private final AtomicReference> metricsBuffer = + new AtomicReference<>(new ConcurrentLinkedQueue<>()); + + // Set of registered AbfsClients. + private final Set clients = + ConcurrentHashMap.newKeySet(); + + // Maximum size of metrics header in characters. + private static final long MAX_HEADER_SIZE = 1024; + + // Constructor + MetricsBucket(SimpleRateLimiter rateLimiter) { + this.rateLimiter = rateLimiter; + } + + /** + * Register a new AbfsClient. + * @param client the AbfsClient to register + */ + public void registerClient(AbfsClient client) { + if (client != null) { + clients.add(client); + } + } + + /** + * Deregister an AbfsClient. If this is the last client, drain and send + * any remaining metrics. + * @param client the AbfsClient to deregister + * @return true if the client was deregistered, false otherwise + */ + public boolean deregisterClient(AbfsClient client) { + if (client == null) { + return false; + } + ConcurrentLinkedQueue batchToSend = null; + boolean isLastClient = false; + + synchronized (this) { + if (!clients.contains(client)) { + return false; + } + + if (clients.size() == 1) { + // This client is the last one — drain metrics now + batchToSend = metricsBuffer.getAndSet(new ConcurrentLinkedQueue<>()); + isLastClient = true; + } + + clients.remove(client); + } + if (isLastClient) { + sendMetrics(client, batchToSend); + } + return true; + } + + /** + * Add a metric to the buffer. + * @param metric the metric to add + */ + void addRequest(String metric) { + if (metric != null) { + metricsBuffer.get().add(metric); + } + } + + /** + * Drain the metrics buffer and send if there are registered clients. + */ + public void drainAndSendIfReady() { + AbfsClient client; + + synchronized (this) { + if (clients.isEmpty()) { + return; + } + client = clients.iterator().next(); + } + + ConcurrentLinkedQueue batch = metricsBuffer.getAndSet( + new ConcurrentLinkedQueue<>()); + if (batch.isEmpty()) { + return; + } + + sendMetrics(client, batch); + } + + // Send metrics outside synchronized block + private void sendMetrics(AbfsClient client, + ConcurrentLinkedQueue batchToSend) { + // Send outside synchronized block + if (client != null && batchToSend != null && !batchToSend.isEmpty()) { + for (String chunk : splitListBySize(batchToSend, MAX_HEADER_SIZE)) { + rateLimiter.acquire(); // Rate limiting + try { + client.getMetricCall(chunk); + } catch (IOException ignored) {} + } + } + } + + // Check if there are no registered clients + public boolean isEmpty() { + return clients.isEmpty(); + } + + /** + * Split the list of metrics into chunks that fit within maxChars. + * Each metric is wrapped in square brackets and separated by colons. + */ + private static List splitListBySize( + ConcurrentLinkedQueue items, long maxChars) { + + if (items.isEmpty()) { + return Collections.emptyList(); + } + + List result = new ArrayList<>(); + StringBuilder sb = new StringBuilder(); + + for (String s : new ArrayList<>(items)) { + String wrapped = "[" + s + "]"; + int additional = + sb.length() == 0 ? wrapped.length() + : wrapped.length() + 1; + + if (wrapped.length() > maxChars) { + if (sb.length() > 0) { + result.add(sb.toString()); + sb.setLength(0); + } + result.add(wrapped); + continue; + } + + if (sb.length() + additional > maxChars) { + result.add(sb.toString()); + sb.setLength(0); + } + + if (sb.length() > 0) { + sb.append(':'); + } + sb.append(wrapped); + } + + if (sb.length() > 0) { + result.add(sb.toString()); + } + + return result; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/MetricFormat.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/MetricFormat.java index 48c216ff6e5bb..fdb57e5be20ad 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/MetricFormat.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/MetricFormat.java @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.hadoop.fs.azurebfs.utils; public enum MetricFormat { @@ -24,13 +25,11 @@ public enum MetricFormat { INTERNAL_FOOTER_METRIC_FORMAT, // :: // : - INTERNAL_METRIC_FORMAT, // :: + INTERNAL_METRIC_FORMAT; // :: // :: - EMPTY; - @Override public String toString() { - return this == EMPTY ? "" : this.name(); + return this.name(); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/ResourceUtilizationUtils.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/ResourceUtilizationUtils.java index c151a483b1853..cf47eff782d6c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/ResourceUtilizationUtils.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/ResourceUtilizationUtils.java @@ -149,7 +149,8 @@ public static long getMaxHeapMemory() { * @return the PID of the current JVM process */ public static long getJvmProcessId() { - return ProcessHandle.current().pid(); + String jvmName = ManagementFactory.getRuntimeMXBean().getName(); + return Long.parseLong(jvmName.split("@")[0]); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java new file mode 100644 index 0000000000000..f12dc78944eff --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.LockSupport; + +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; + +public class SimpleRateLimiter { + + /** The minimum interval between permits, in nanoseconds. */ + private final long intervalNanos; + + /** The next allowed time (in nanoseconds) when a permit may be issued. */ + private final AtomicLong nextAllowedTime = new AtomicLong(0); + + /** + * Creates a rate limiter with a fixed number of permits allowed per second. + * + * @param permitsPerSecond the maximum number of permits allowed per second; + * must be a positive integer + * @throws IllegalArgumentException if {@code permitsPerSecond <= 0} + */ + public SimpleRateLimiter(int permitsPerSecond) + throws InvalidConfigurationValueException { + if (permitsPerSecond <= 0) { + throw new InvalidConfigurationValueException( + "Aggregated Metrics Per Second Call"); + } + this.intervalNanos = 1_000_000_000L / permitsPerSecond; + } + + /** + * Acquires a permit from the rate limiter, blocking if necessary to maintain + * the configured rate. + * + * If the current time is earlier than the next allowed permit time, this + * method blocks for the required duration. Otherwise, it proceeds + * immediately. + */ + public void acquire() { + while (true) { // In case of failure, it will retry + long now = System.nanoTime(); + long prev = nextAllowedTime.get(); + long next = Math.max(prev, now) + intervalNanos; + + if (nextAllowedTime.compareAndSet(prev, next)) { + long wait = next - now - intervalNanos; // adjust for this permit + if (wait > 0) { + LockSupport.parkNanos(wait); + } + return; + } + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java index 8decba90b9f37..ca00a888c9d1d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java @@ -70,6 +70,7 @@ public class TracingContext { private String position = EMPTY_STRING; // position of read/write in remote file private String metricResults = EMPTY_STRING; private ReadType readType = ReadType.UNKNOWN_READ; + private boolean isMetricCall = false; private String resourceUtilizationMetricResults = EMPTY_STRING; /** @@ -154,6 +155,7 @@ public TracingContext(TracingContext originalTracingContext) { } this.metricResults = originalTracingContext.metricResults; this.readType = originalTracingContext.readType; + this.isMetricCall = originalTracingContext.isMetricCall; this.resourceUtilizationMetricResults = originalTracingContext.resourceUtilizationMetricResults; } @@ -194,6 +196,10 @@ public void setListener(Listener listener) { this.listener = listener; } + public boolean isMetricCall() { + return TracingHeaderFormat.AGGREGATED_METRICS_FORMAT.equals(format); + } + /** * Concatenate all components separated by (:) into a string and set into * X_MS_CLIENT_REQUEST_ID header of the http operation @@ -243,6 +249,10 @@ public void constructHeader(AbfsHttpOperation httpOperation, String previousFail header = TracingHeaderVersion.getCurrentVersion() + COLON + clientCorrelationID + COLON + clientRequestId; break; + case AGGREGATED_METRICS_FORMAT: + header = TracingHeaderVersion.getMetricsCurrentVersion() + COLON + + metricResults; + break; default: //case SINGLE_ID_FORMAT header = TracingHeaderVersion.getCurrentVersion() + COLON diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderFormat.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderFormat.java index 3f23ae3ed7c14..55c45f3276575 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderFormat.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderFormat.java @@ -23,6 +23,9 @@ public enum TracingHeaderFormat { TWO_ID_FORMAT, // : - ALL_ID_FORMAT; // :: + ALL_ID_FORMAT, // :: // :::: + + AGGREGATED_METRICS_FORMAT; // : + // : v.trim().length() > 1, "trimmed length > 1"); - } - private static final String TEST_PATH = "/testfile"; private static final String SLEEP_PERIOD = "90000"; + public ITestAbfsReadFooterMetrics() throws Exception { + } + /** * Integration test for reading footer metrics with both Parquet and non-Parquet reads. */ diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index 4bf1f56e7eecc..64d0e35fac869 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.io.FileNotFoundException; import java.io.IOException; import java.net.ProtocolException; import java.net.URI; @@ -26,6 +27,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.regex.Pattern; @@ -38,6 +40,7 @@ import org.mockito.Mockito; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; @@ -49,19 +52,24 @@ import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes; import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsApacheHttpExpect100Exception; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TokenAccessProviderException; import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.enums.AbfsBackoffMetricsEnum; import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; +import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; +import org.apache.hadoop.fs.azurebfs.utils.UriUtils; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; import org.apache.hadoop.test.ReflectionUtils; import org.apache.http.HttpClientConnection; import org.apache.http.HttpResponse; +import org.apache.http.client.utils.URIBuilder; import static java.net.HttpURLConnection.HTTP_NOT_FOUND; import static org.apache.hadoop.fs.azurebfs.ITestAzureBlobFileSystemListStatus.TEST_CONTINUATION_TOKEN; @@ -72,9 +80,19 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EXPECT_100_JDK_ERROR; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_HEAD; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_IS_HNS_ENABLED; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ALWAYS_USE_HTTPS; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_COLLECTION_ENABLED; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_EMIT_THRESHOLD_INTERVAL_SECS; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_EMIT_THRESHOLD; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JAVA_VENDOR; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JAVA_VERSION; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.OS_ARCH; @@ -82,13 +100,10 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.OS_VERSION; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SEMICOLON; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SINGLE_WHITE_SPACE; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_IS_HNS_ENABLED; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_APACHE_HTTP_CLIENT_CACHE_WARMUP_COUNT; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_CLUSTER_NAME; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_CLUSTER_TYPE; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_NAME; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_VALUE_UNKNOWN; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE; @@ -99,8 +114,11 @@ import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME; import static org.apache.hadoop.fs.azurebfs.services.AuthType.SharedKey; +import static org.apache.hadoop.fs.azurebfs.services.RetryPolicyConstants.EXPONENTIAL_RETRY_POLICY_ABBREVIATION; +import static org.apache.hadoop.fs.azurebfs.utils.MetricFormat.INTERNAL_BACKOFF_METRIC_FORMAT; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.assertj.core.api.Assumptions.assumeThat; +import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; @@ -181,10 +199,10 @@ private String getUserAgentString(AbfsConfiguration config, AbfsClient client; if (AbfsServiceType.DFS.equals(config.getFsConfiguredServiceType())) { client = new AbfsDfsClient(new URL("https://azure.com"), null, - config, (AccessTokenProvider) null, null, null, abfsClientContext); + config, (AccessTokenProvider) null, null, null, abfsClientContext, UUID.randomUUID().toString()); } else { client = new AbfsBlobClient(new URL("https://azure.com"), null, - config, (AccessTokenProvider) null, null, null, abfsClientContext); + config, (AccessTokenProvider) null, null, null, abfsClientContext, UUID.randomUUID().toString()); } String sslProviderName = null; if (includeSSLProvider) { @@ -439,7 +457,7 @@ public static AbfsClient createTestClientFromCurrentContext( : null), null, null, - abfsClientContext); + abfsClientContext, UUID.randomUUID().toString()); } else { testClient = new AbfsBlobClient( baseAbfsClientInstance.getBaseUrl(), @@ -455,7 +473,7 @@ public static AbfsClient createTestClientFromCurrentContext( : null), null, null, - abfsClientContext); + abfsClientContext, UUID.randomUUID().toString()); } return testClient; @@ -493,7 +511,7 @@ public static AbfsClient createBlobClientFromCurrentContext( : null), null, null, - abfsClientContext); + abfsClientContext, UUID.randomUUID().toString()); return testClient; } @@ -876,6 +894,299 @@ public void testIsNonEmptyDirectory() throws IOException { false, 1, true); } + /** + * Test to verify that in case metric account is not set, + * metric collection is enabled with default metric format + * and account url. + * + * @throws Exception in case of any failure + */ + @Test + public void testMetricAccountFallback() throws Exception { + Configuration configuration = getRawConfiguration(); + configuration.setBoolean( + AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, false); + configuration.setBoolean(FS_AZURE_ACCOUNT_IS_HNS_ENABLED, false); + configuration.unset(FS_AZURE_METRIC_ACCOUNT_NAME); + configuration.unset(FS_AZURE_METRIC_ACCOUNT_KEY); + configuration.unset(FS_AZURE_METRIC_FORMAT); + configuration.setBoolean(FS_AZURE_ALWAYS_USE_HTTPS, true); + final AzureBlobFileSystem fs = getFileSystem(configuration); + Assertions.assertThat( + fs.getAbfsStore().getAbfsConfiguration().getMetricFormat()) + .describedAs( + "In case metric format is not set, metric format should " + + "be defaulted to internal metric format") + .isEqualTo(MetricFormat.INTERNAL_METRIC_FORMAT); + + Assertions.assertThat( + fs.getAbfsStore().getClient().isMetricCollectionEnabled()) + .describedAs( + "Metric collection should be enabled even if metric account is not set") + .isTrue(); + + Assertions.assertThat( + fs.getAbfsStore().getClient().getAbfsCounters().toString()) + .describedAs( + "AbfsCounters should not contain backoff related metrics " + + "as no metric is collected for backoff") + .doesNotContain("#BO:"); + + Assertions.assertThat( + fs.getAbfsStore().getClient().getAbfsCounters().toString()) + .describedAs( + "AbfsCounters should not contain read footer related metrics " + + "as no metric is collected for read footer") + .doesNotContain("#FO:"); + + final URIBuilder uriBuilder = new URIBuilder(); + uriBuilder.setScheme(FileSystemUriSchemes.HTTPS_SCHEME); + uriBuilder.setHost(fs.getUri().getHost()); + uriBuilder.setPath(FORWARD_SLASH); + Assertions.assertThat(fs.getAbfsStore().getClient().getMetricsUrl()) + .describedAs( + "In case metric account is not set, account url should be used") + .isEqualTo( + UriUtils.changeUrlFromBlobToDfs(uriBuilder.build().toURL())); + } + + /** + * Test to verify that in case metric format is set to empty, + * metric collection is disabled. + * + * @throws Exception in case of any failure + */ + @Test + public void testMetricCollectionWithDifferentMetricFormat() throws Exception { + Configuration configuration = getRawConfiguration(); + // Setting this configuration just to ensure there is only one call during filesystem initialization + configuration.setBoolean( + AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, true); + configuration.setBoolean(FS_AZURE_ACCOUNT_IS_HNS_ENABLED, false); + configuration.unset(FS_AZURE_METRIC_FORMAT); + configuration.setEnum(FS_AZURE_METRIC_FORMAT, + INTERNAL_BACKOFF_METRIC_FORMAT); + final AzureBlobFileSystem fs = getFileSystem(configuration); + int totalCalls = 1; // Filesystem initialization call + Assertions.assertThat( + fs.getAbfsStore().getClient().isMetricCollectionEnabled()) + .describedAs("Metric collection should be enabled by default") + .isTrue(); + + Assertions.assertThat( + fs.getAbfsStore().getAbfsConfiguration().getMetricFormat()) + .describedAs("Metric format should be as set in configuration") + .isEqualTo(INTERNAL_BACKOFF_METRIC_FORMAT); + + Assertions.assertThat( + fs.getAbfsStore().getClient().getAbfsCounters().toString()) + .describedAs( + "AbfsCounters should only contains backoff related metrics when " + + "metric format is internal backoff metric format") + .contains("#BO:"); + + Assertions.assertThat( + fs.getAbfsStore().getClient().getAbfsCounters().toString()) + .describedAs( + "AbfsCounters should not contains read footer related metrics when " + + "metric format is internal backoff metric format") + .doesNotContain("#FO:"); + + Assertions.assertThat(fs.getAbfsStore() + .getClient() + .getAbfsCounters() + .getAbfsBackoffMetrics() + .getMetricValue( + AbfsBackoffMetricsEnum.TOTAL_NUMBER_OF_REQUESTS)) + .describedAs( + "Total number of requests should be 1 for filesystem initialization") + .isEqualTo(totalCalls); + + + if (fs.getAbfsStore().getClient() instanceof AbfsDfsClient) { + intercept(FileNotFoundException.class, + "The specified path does not exist.", + () -> fs.listStatus(path("/testPath"))); + totalCalls += 1; // listStatus call + } else { + intercept(FileNotFoundException.class, + "The specified blob does not exist.", + () -> fs.listStatus(path("/testPath"))); + totalCalls += 2; // listStatus call makes 2 calls to the service + } + + Assertions.assertThat(fs.getAbfsStore() + .getClient() + .getAbfsCounters() + .getAbfsBackoffMetrics() + .getMetricValue( + AbfsBackoffMetricsEnum.TOTAL_NUMBER_OF_REQUESTS)) + .describedAs( + "Total number of requests should be 2 after listStatus") + .isEqualTo(totalCalls); + } + + /** + * Test to verify that clientRequestId contains backoff metrics + * when metric format is set to internal backoff metric format. + * + * @throws Exception in case of any failure + */ + @Test + public void testGetMetricsCallMethod() throws Exception { + // File system init will make few calls to the service. + // Backoff metrics will be collected for those calls. + AzureBlobFileSystem fs = getFileSystem(); + TracingContext tracingContext = new TracingContext( + fs.getAbfsStore().getAbfsConfiguration().getClientCorrelationId(), + "test-filesystem-id", FSOperationType.TEST_OP, true, + TracingHeaderFormat.AGGREGATED_METRICS_FORMAT, null, + fs.getAbfsStore().getClient().getAbfsCounters().toString()); + + AbfsHttpOperation abfsHttpOperation = getAbfsClient( + fs.getAbfsStore()).getAbfsRestOperation( + AbfsRestOperationType.GetFileSystemProperties, + HTTP_METHOD_HEAD, + fs.getAbfsStore().getClient().getMetricsUrl(), + getTestRequestHeaders(fs.getAbfsStore().getClient())) + .createHttpOperation(); + tracingContext.constructHeader(abfsHttpOperation, null, + EXPONENTIAL_RETRY_POLICY_ABBREVIATION); + assertThat(abfsHttpOperation.getClientRequestId()) + .describedAs("ClientRequestId should be contains Backoff metrics") + .contains("#BO:"); + } + + /** + * Verifies that metrics are emitted when the threshold is low. + */ + @Test + public void testMetricsEmitBasedOnCount() throws Exception { + runMetricsEmitTest(10L, true); + } + + /** + * Verifies that metrics are not emitted when the threshold is high. + */ + @Test + public void testMetricsEmitWithHighThreshold() throws Exception { + runMetricsEmitTest(100L, false); + } + + /** + * Runs a metrics emit test for a given threshold and expected behavior. + * Uses the same write/flush pattern and asserts based on emit expectation. + */ + private void runMetricsEmitTest(long threshold, boolean expectEmit) + throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + Configuration configuration = fs.getAbfsStore() + .getAbfsConfiguration() + .getRawConfiguration(); + int totalWaitTime = 30; + configuration.setLong(FS_AZURE_METRIC_EMIT_THRESHOLD, threshold); + configuration.setLong(FS_AZURE_METRICS_EMIT_THRESHOLD_INTERVAL_SECS, totalWaitTime); + fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration); + + // Initial total metrics + long totalMetrics = fs.getAbfsStore().getClient().getAbfsCounters() + .getAbfsBackoffMetrics() + .getMetricValue(AbfsBackoffMetricsEnum.TOTAL_NUMBER_OF_REQUESTS); + + // Create file + Path testPath = path(TEST_PATH); + FSDataOutputStream stream = fs.create(testPath); + if (fs.getAbfsStore() + .getClientHandler() + .getIngressClient() instanceof AbfsDfsClient) { + // create file + set properties requests + totalMetrics += 1; + } else { + // create file + set properties + get properties requests + totalMetrics += 4; + } + + Assertions.assertThat(currentTotal(fs)) + .describedAs("Total number of requests should increase after create") + .isEqualTo(totalMetrics); + + // Append data + final int writeSize = 1024 * 1024; + final int numWrites = 10; + final byte dataByte = 5; + byte[] data = new byte[writeSize]; + Arrays.fill(data, dataByte); + + for (int i = 0; i < numWrites; i++) { + stream.write(data); // +1 request + stream.hflush(); + if (fs.getAbfsStore() + .isAppendBlobKey(fs.makeQualified(testPath).toString())) { + totalMetrics += 1;// +1 request + } else { + totalMetrics += 2; // +2 requests + } + } + + if (fs.getAbfsStore() + .getClientHandler() + .getIngressClient() instanceof AbfsDfsClient) { + totalMetrics += 1; // One extra call for flush in case of DFS client + } + + // Close stream + stream.close(); + + // Before waiting for emit scheduler to run, total metrics should match + Assertions.assertThat(currentTotal(fs)) + .describedAs( + "Total requests should match counted requests when threshold is high") + .isEqualTo(totalMetrics); + // Wait for emit scheduler to run + Thread.sleep(totalWaitTime * 1000); // 20 seconds + + if (expectEmit) { + Assertions.assertThat(currentTotal(fs)) + .describedAs( + "Once the emit scheduler has run, total requests should be reset to 0") + .isEqualTo(0); + } else { + Assertions.assertThat(currentTotal(fs)) + .describedAs( + "In case threshold is high, total requests should remain the same after emit scheduler run") + .isEqualTo(totalMetrics); + } + } + + @Test + public void testAggregateMetricsConfigs() throws Exception { + Configuration configuration = getRawConfiguration(); + // Disabling the aggregate metrics collection + configuration.setBoolean(FS_AZURE_METRICS_COLLECTION_ENABLED, false); + AzureBlobFileSystem fs = this.getFileSystem(configuration); + Assertions.assertThat(fs.getAbfsStore().getClient().getMetricsEmitScheduler()) + .describedAs("Since metrics collection is not enabled, " + + "scheduler should not be initialised") + .isNull(); + + // Disabling the aggregate metrics collection + configuration.setBoolean(FS_AZURE_METRICS_COLLECTION_ENABLED, true); + fs = this.getFileSystem(configuration); + Assertions.assertThat(fs.getAbfsStore().getClient().getMetricsEmitScheduler()) + .describedAs("Since metrics collection is not enabled, " + + "scheduler should initialised") + .isNotNull(); + } + + /** + * Returns the current total number of requests from AbfsBackoffMetrics. + */ + private long currentTotal(AzureBlobFileSystem fs) { + return fs.getAbfsStore().getClient().getAbfsCounters() + .getAbfsBackoffMetrics() + .getMetricValue(AbfsBackoffMetricsEnum.TOTAL_NUMBER_OF_REQUESTS); + } + private void testIsNonEmptyDirectoryInternal(String firstCT, boolean isfirstEmpty, String secondCT, boolean isSecondEmpty, int expectedInvocations, boolean isNonEmpty) throws IOException { @@ -1241,4 +1552,6 @@ private void checkConnectionReuse(AbfsClient abfsClient) throws IOException { .isEqualTo(keepAliveCache.peekLast()); } } + + } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java index 039e01ed09581..0db7f51986db3 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java @@ -21,6 +21,7 @@ import java.net.URI; import java.net.URL; import java.util.Map; +import java.util.UUID; import org.junit.jupiter.api.Test; @@ -29,12 +30,9 @@ import org.apache.hadoop.fs.azurebfs.AbfsCountersImpl; import org.apache.hadoop.fs.azurebfs.MockIntercept; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; -import org.apache.hadoop.fs.azurebfs.utils.Base64; -import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_KEY; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_NAME; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SHOULD_EMIT_METRICS_ON_IDLE_TIME; import static org.apache.hadoop.fs.azurebfs.services.AbfsClient.ABFS_CLIENT_TIMER_THREAD_NAME; import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; @@ -60,18 +58,19 @@ public void testTimerInitializationWithoutMetricCollection() throws Exception { final Configuration configuration = new Configuration(); AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, ACCOUNT_NAME); abfsConfiguration.unset(FS_AZURE_METRIC_FORMAT); + configuration.setBoolean(FS_AZURE_SHOULD_EMIT_METRICS_ON_IDLE_TIME, false); AbfsCounters abfsCounters = spy(new AbfsCountersImpl(new URI("abcd"))); AbfsClientContext abfsClientContext = new AbfsClientContextBuilder().withAbfsCounters(abfsCounters).build(); // Get an instance of AbfsClient. - AbfsClient client = new AbfsDfsClient(new URL("https://azure.com"), + AbfsClient client = new AbfsDfsClient(new URL("https://" + ACCOUNT_NAME + "/"), null, abfsConfiguration, (AccessTokenProvider) null, null, null, - abfsClientContext); + abfsClientContext, UUID.randomUUID().toString()); assertThat(client.getTimer()) .describedAs("Timer should not be initialized") @@ -93,22 +92,20 @@ public void testTimerInitializationWithoutMetricCollection() throws Exception { @Test public void testTimerInitializationWithMetricCollection() throws Exception { final Configuration configuration = new Configuration(); - configuration.set(FS_AZURE_METRIC_FORMAT, String.valueOf(MetricFormat.INTERNAL_BACKOFF_METRIC_FORMAT)); - configuration.set(FS_AZURE_METRIC_ACCOUNT_NAME, ACCOUNT_NAME); - configuration.set(FS_AZURE_METRIC_ACCOUNT_KEY, Base64.encode(ACCOUNT_KEY.getBytes())); + configuration.setBoolean(FS_AZURE_SHOULD_EMIT_METRICS_ON_IDLE_TIME, true); AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, ACCOUNT_NAME); AbfsCounters abfsCounters = spy(new AbfsCountersImpl(new URI("abcd"))); AbfsClientContext abfsClientContext = new AbfsClientContextBuilder().withAbfsCounters(abfsCounters).build(); // Get an instance of AbfsClient. - AbfsClient client = new AbfsDfsClient(new URL("https://azure.com"), + AbfsClient client = new AbfsDfsClient(new URL("https://" + ACCOUNT_NAME + "/"), null, abfsConfiguration, (AccessTokenProvider) null, null, null, - abfsClientContext); + abfsClientContext, UUID.randomUUID().toString()); assertThat(client.getTimer()) .describedAs("Timer should be initialized") diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java index 997818b5f9f96..a4178a9e64127 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java @@ -231,7 +231,7 @@ private void executeThenFail(final AbfsClient client, final byte[] buffer = answer.getArgument(0); final int offset = answer.getArgument(1); final int length = answer.getArgument(2); - normalRestOp.signRequest(normalOp, length); + normalRestOp.signRequest(normalOp, length, false); normalOp.sendPayload(buffer, offset, length); normalOp.processResponse(buffer, offset, length); LOG.info("Actual outcome is {} \"{}\" \"{}\"; injecting failure", diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java index 62f6e253fb518..a15c16118ef96 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java @@ -26,10 +26,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; import static org.apache.hadoop.fs.azurebfs.enums.AbfsBackoffMetricsEnum.NUMBER_OF_REQUESTS_FAILED; import static org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType.DeletePath; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_KEY; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_NAME; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_URI; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; import java.util.ArrayList; @@ -42,12 +39,6 @@ public class TestAbfsRestOperation extends public TestAbfsRestOperation() throws Exception { } - private void checkPrerequisites() { - assumeValidTestConfigPresent(getRawConfiguration(), FS_AZURE_METRIC_ACCOUNT_NAME); - assumeValidTestConfigPresent(getRawConfiguration(), FS_AZURE_METRIC_ACCOUNT_KEY); - assumeValidTestConfigPresent(getRawConfiguration(), FS_AZURE_METRIC_URI); - } - /** * Test for backoff retry metrics. * @@ -58,7 +49,6 @@ private void checkPrerequisites() { */ @Test public void testBackoffRetryMetrics() throws Exception { - checkPrerequisites(); // Create an AzureBlobFileSystem instance. final Configuration configuration = getRawConfiguration(); configuration.set(FS_AZURE_METRIC_FORMAT, String.valueOf(MetricFormat.INTERNAL_BACKOFF_METRIC_FORMAT)); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java new file mode 100644 index 0000000000000..538ba54f5d0ac --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java @@ -0,0 +1,708 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COLON; + +public class TestAggregateMetricsManager extends AbstractAbfsIntegrationTest { + + // The manager under test + private AggregateMetricsManager manager; + + /** + * Constructor for TestAggregateMetricsManager. + * + * @throws Exception if an error occurs during setup + */ + public TestAggregateMetricsManager() throws Exception { + super(); + } + + /** + * Resets the AggregateMetricsManager instance before each test. + */ + @BeforeEach + void resetManager() throws Exception { + manager = AggregateMetricsManager.get(10, 3); + } + + /** + * Creates a fully instrumented TracingContext useful for metric dispatch tests. + */ + private String getMetricsData() { + return UUID.randomUUID() + COLON + UUID.randomUUID() + COLON + "#BO:"; + } + + /** + * Wraps an AbfsClient instance in a spy and counts metric call invocations. + */ + private AbfsClient spyClient(AzureBlobFileSystem azureBlobFileSystem, + AtomicInteger counter) + throws IOException { + AzureBlobFileSystemStore store = Mockito.spy( + azureBlobFileSystem.getAbfsStore()); + Mockito.doReturn(store).when(azureBlobFileSystem).getAbfsStore(); + AbfsClient client = Mockito.spy(store.getClient()); + Mockito.doReturn(client).when(store).getClient(); + + Mockito.doAnswer(inv -> { + counter.incrementAndGet(); + return inv.callRealMethod(); + }).when(client).getMetricCall(Mockito.any()); + + return client; + } + + /** + * Verifies that multiple recordMetric calls result in exactly one aggregated + * dispatch within a 1-second dispatch window. + */ + @Test + public void testRecordMetric() throws Exception { + AtomicInteger calls = new AtomicInteger(0); + AbfsClient client = spyClient(Mockito.spy(this.getFileSystem()), calls); + manager.registerClient("acc1", client); + for (int i = 0; i < 5; i++) { + manager.recordMetric("acc1", getMetricsData()); + } + manager.deregisterClient("acc1", client); + + Assertions.assertThat(calls.get()) + .describedAs("Expected exactly 1 aggregated metrics send") + .isEqualTo(1); + } + + /** + * Verifies that aggregated metrics for the same account are sent + * once per dispatch window, meaning 2 windows → 2 sends. + */ + @Test + public void testRecordMetricTwoWindows() throws Exception { + AtomicInteger calls = new AtomicInteger(0); + AbfsClient client = spyClient(Mockito.spy(this.getFileSystem()), calls); + manager.registerClient("acc1", client); + for (int i = 0; i < 5; i++) { + manager.recordMetric("acc1", getMetricsData()); + } + + manager.deregisterClient("acc1", client); + Assertions.assertThat(calls.get()) + .describedAs("Expected 1 aggregated sends") + .isEqualTo(1); + + manager.registerClient("acc1", client); + // Second window + for (int i = 0; i < 5; i++) { + manager.recordMetric("acc1", getMetricsData()); + } + manager.deregisterClient("acc1", client); + + Assertions.assertThat(calls.get()) + .describedAs("Expected 2 aggregated sends") + .isEqualTo(2); + } + + /** + * Ensures that recordMetric handles invalid input without exceptions. + */ + @Test + public void testRecordMetricWithNulls() throws Exception { + manager.recordMetric(null, null); + manager.recordMetric("", null); + manager.recordMetric("acc", null); + manager.recordMetric("acc", null); + } + + /** + * Ensures that metrics for separate accounts still respect global + * rate limiting but send independently within the same window. + */ + @Test + public void testMultipleAccounts() throws Exception { + + AtomicInteger calls1 = new AtomicInteger(); + AbfsClient client1 = spyClient(Mockito.spy(this.getFileSystem()), calls1); + + AtomicInteger calls2 = new AtomicInteger(); + AbfsClient client2 = spyClient(Mockito.spy( + (AzureBlobFileSystem) AzureBlobFileSystem.newInstance( + getRawConfiguration())), calls2); + + manager.registerClient("acc1", client1); + manager.registerClient("acc2", client2); + manager.recordMetric("acc1", getMetricsData()); + manager.recordMetric("acc2", getMetricsData()); + manager.deregisterClient("acc1", client1); + manager.deregisterClient("acc2", client2); + + Assertions.assertThat(calls1.get()) + .describedAs("Account 1 dispatched once") + .isEqualTo(1); + + Assertions.assertThat(calls2.get()) + .describedAs("Account 2 dispatched once") + .isEqualTo(1); + } + + /** + * Tests concurrent registration, metric recording, and deregistration + * of multiple clients for the same account. + */ + @Test + public void testMultipleClientsRegistryInParallel() throws Exception { + AtomicInteger calls1 = new AtomicInteger(0); + AbfsClient client1 = spyClient(Mockito.spy(this.getFileSystem()), calls1); + + AtomicInteger calls2 = new AtomicInteger(0); + AbfsClient client2 = spyClient(Mockito.spy(this.getFileSystem()), calls2); + + CountDownLatch latch = new CountDownLatch(5); + + new Thread(() -> { + try { + manager.registerClient("acc1", client1); + } finally { + latch.countDown(); + } + }).start(); + + new Thread(() -> { + try { + manager.registerClient("acc1", client2); + } finally { + latch.countDown(); + } + }).start(); + + new Thread(() -> { + try { + for (int i = 0; i < 10; i++) { + manager.recordMetric("acc1", getMetricsData()); + } + } finally { + latch.countDown(); + } + }).start(); + + new Thread(() -> { + try { + manager.deregisterClient("acc1", client1); + } finally { + latch.countDown(); + } + }).start(); + + new Thread(() -> { + try { + manager.deregisterClient("acc1", client2); + } finally { + latch.countDown(); + } + }).start(); + + //wait for all threads to finish + latch.await(); + + Assertions.assertThat((calls1.get() == 1) ^ (calls2.get() == 1)) + .describedAs("Exactly one client should send metrics") + .isTrue(); + } + + /** + * Tests deregistering a nonexistent client. + */ + @Test + public void testDeregisterNonexistentClient() throws IOException { + AbfsClient client = Mockito.spy( + this.getFileSystem().getAbfsStore().getClient()); + // Should not throw + boolean isRemoved = manager.deregisterClient("nonexistentAccount", client); + Assertions.assertThat(isRemoved) + .describedAs("Deregistering nonexistent client should return false") + .isFalse(); + } + + /** + * Tests that when the aggregated metric data exceeds the buffer size, + * multiple dispatches occur as expected. + */ + @Test + public void testMultipleMetricCallsInCaseDataIsMoreThanBufferSize() + throws Exception { + AtomicInteger calls = new AtomicInteger(0); + AzureBlobFileSystem azureBlobFileSystem = Mockito.spy(this.getFileSystem()); + AzureBlobFileSystemStore store = Mockito.spy( + azureBlobFileSystem.getAbfsStore()); + Mockito.doReturn(store).when(azureBlobFileSystem).getAbfsStore(); + AbfsClient client = Mockito.spy(store.getClient()); + Mockito.doReturn(client).when(store).getClient(); + + Mockito.doAnswer(inv -> { + String data = (String) inv.getArguments()[0]; + if (calls.get() < 3) { // first three calls, data size will be 927 chars + Assertions.assertThat(data.length()) + .describedAs("Aggregated metric data size should be 927 chars") + .isEqualTo(927); + } else { // last call, data size will be 115 chars + Assertions.assertThat(data.length()) + .describedAs("Aggregated metric data size should be 115 chars") + .isEqualTo(115); + } + calls.incrementAndGet(); + return inv.callRealMethod(); + }).when(client).getMetricCall(Mockito.any()); + manager.registerClient("acc1", client); + for (int i = 0; i < 25; i++) { + manager.recordMetric("acc1", getMetricsData() + + "$OT=163$RT=6.024%$TRNR=2543$TR=2706"); // each data is 113 chars + } + manager.deregisterClient("acc1", client); + + // 113 + 2 ([,]) = 115 chars per metric, 115 * 25 = 2875 chars total + 24 (:) = 2899 chars + // 1st -> 115 * 8 = 920 chars + 7 (:) = 927 chars + // 2nd -> 115 * 8 = 920 chars + 7 (:) = 927 chars + // 3rd -> 115 * 9 = 920 chars + 7 (:) = 927 chars + // 4th -> remaining + Assertions.assertThat(calls.get()) + .describedAs("Expected exactly 3 aggregated metrics send") + .isEqualTo(4); + } + + /** + * Verifies that when multiple clients send metrics concurrently, + * the global rate limiter enforces spacing between dispatches. + */ + @Test + public void testRateLimitMetricCalls() throws IOException, InterruptedException { + int permitsPerSecond = 3; + long minIntervalMs = 1_000 / permitsPerSecond; // 333ms + double toleranceMs = 15.0; // allow 15ms jitter + int numClients = 10; + + // Store timestamps for each client + final List times = new ArrayList<>(); + AbfsClient[] abfsClients = new AbfsClient[numClients]; + + for (int i = 0; i < numClients; i++) { + AtomicLong time = new AtomicLong(); + times.add(time); + + AbfsClient client = createSpiedClient(time); + abfsClients[i] = client; + manager.registerClient("acc" + i, client); + } + + // Record metrics for all clients + for (int i = 0; i < numClients; i++) { + manager.recordMetric("acc" + i, getMetricsData()); + } + + // Deregister all clients concurrently + CountDownLatch latch = new CountDownLatch(numClients); + for (int i = 0; i < numClients; i++) { + final int idx = i; + new Thread(() -> { + try { + manager.deregisterClient("acc" + idx, + abfsClients[idx]); // pass time for demonstration if needed + } finally { + latch.countDown(); + } + }).start(); + } + latch.await(); + + // Check that interval between any two timestamps is ≥ minIntervalMs - tolerance + for (int i = 0; i < times.size(); i++) { + for (int j = i + 1; j < times.size(); j++) { + double diffMs = Math.abs(times.get(i).get() - times.get(j).get()) + / 1_000_000.0; + Assertions.assertThat(diffMs) + .describedAs( + "Expected at least %.3f ms (tolerance %.3f) between metric sends", + minIntervalMs, toleranceMs) + .isGreaterThanOrEqualTo(minIntervalMs - toleranceMs); + } + } + } + + + /** + * Tests that the shutdown hook flushes metrics on JVM exit. + */ + @Test + public void testAggregatedMetricsManagerWithJVMExit0() + throws IOException, InterruptedException { + // ------------------------------- + // Program 1 (kept exactly as you asked) + // ------------------------------- + String program = + "import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;\n" + + "import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore;\n" + + "import org.apache.hadoop.fs.azurebfs.services.*;\n" + + "import org.apache.hadoop.fs.azurebfs.utils.*;\n" + + "import org.apache.hadoop.conf.Configuration;\n" + + "import org.apache.hadoop.fs.FileSystem;\n" + + "import java.util.*;\n" + + "import java.util.concurrent.atomic.AtomicInteger;\n" + + "import java.io.IOException;\n" + + "import java.net.URI;\n" + + "import org.mockito.Mockito;\n" + + "\n" + + "public class ShutdownTestProg {\n" + + " public static void main(String[] args) throws Exception {\n" + + " AtomicInteger calls1 = new AtomicInteger();\n" + + " AggregateMetricsManager mgr = AggregateMetricsManager.get(10, 3);\n" + + "\n" + + " URI uri = new URI(\"abfss://test@manishtestfnsnew.dfs.core.windows.net\");\n" + + " Configuration config = new Configuration();\n" + + "\n" + + " AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(uri, config);\n" + + " AbfsClient client = spyClient(Mockito.spy(fs), calls1);\n" + + "\n" + + " mgr.registerClient(\"acc1\", client);\n" + + " mgr.recordMetric(\"acc1\", \"m1\");\n" + + " mgr.recordMetric(\"acc1\", \"m2\");\n" + + "\n" + + " System.out.println(\"BEFORE_EXIT\");\n" + + " System.exit(0);\n" + + " }\n" + + "\n" + + " private static AbfsClient spyClient(AzureBlobFileSystem azureBlobFileSystem,\n" + + " AtomicInteger counter) throws IOException {\n" + + "\n" + + " AzureBlobFileSystemStore store = Mockito.spy(azureBlobFileSystem.getAbfsStore());\n" + + " Mockito.doReturn(store).when(azureBlobFileSystem).getAbfsStore();\n" + + "\n" + + " AbfsClient client = Mockito.spy(store.getClient());\n" + + " Mockito.doReturn(client).when(store).getClient();\n" + + "\n" + + " Mockito.doAnswer(inv -> {\n" + + " counter.incrementAndGet();\n" + + " System.out.println(\"FLUSH:\" + inv.getArguments()[0]);\n" + + " return null;\n" + + " }).when(client).getMetricCall(Mockito.any());\n" + + "\n" + + " return client;\n" + + " }\n" + + "}\n"; + + runProgramAndCaptureOutput(program, true, 0); + } + + /** + * Tests that the shutdown hook flushes metrics on JVM exit after multiple + * clients and deregistrations. + */ + @Test + public void testAggregatedMetricsManagerWithJVMExit1() + throws IOException, InterruptedException { + // ------------------------------- + // Program 2 (final, multi-FS version) + // ------------------------------- + String program = + "import org.apache.hadoop.fs.azurebfs.services.*;\n" + + "import org.apache.hadoop.fs.azurebfs.utils.*;\n" + + "import org.apache.hadoop.conf.Configuration;\n" + + "import org.apache.hadoop.fs.FileSystem;\n" + + "import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;\n" + + "import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore;\n" + + "import java.util.*;\n" + + "import java.util.concurrent.atomic.AtomicInteger;\n" + + "import java.io.IOException;\n" + + "import java.net.URI;\n" + + "import org.mockito.Mockito;\n" + + "\n" + + "public class ShutdownTestProg {\n" + + " public static void main(String[] args) throws Exception {\n" + + " AggregateMetricsManager mgr = AggregateMetricsManager.get(10, 3);\n" + + "\n" + + " AtomicInteger calls1 = new AtomicInteger();\n" + + " AtomicInteger calls2 = new AtomicInteger();\n" + + " AtomicInteger calls3 = new AtomicInteger();\n" + + "\n" + + " URI uri = new URI(\"abfss://test@manishtestfnsnew.dfs.core.windows.net\");\n" + + " Configuration config = new Configuration();\n" + + "\n" + + " AzureBlobFileSystem fs1 = (AzureBlobFileSystem) FileSystem.newInstance(uri, config);\n" + + " AzureBlobFileSystem fs2 = (AzureBlobFileSystem) FileSystem.newInstance(uri, config);\n" + + " AzureBlobFileSystem fs3 = (AzureBlobFileSystem) FileSystem.newInstance(uri, config);\n" + + "\n" + + " AbfsClient client1 = spyClient(Mockito.spy(fs1), calls1);\n" + + " AbfsClient client2 = spyClient(Mockito.spy(fs2), calls2);\n" + + " AbfsClient client3 = spyClient(Mockito.spy(fs3), calls3);\n" + + "\n" + + " mgr.registerClient(\"acc1\", client1);\n" + + " mgr.registerClient(\"acc1\", client2);\n" + + " mgr.registerClient(\"acc1\", client3);\n" + + "\n" + + " mgr.recordMetric(\"acc1\", \"m1\");\n" + + " mgr.recordMetric(\"acc1\", \"m2\");\n" + + "\n" + + " mgr.recordMetric(\"acc1\", \"m3\");\n" + + " mgr.recordMetric(\"acc1\", \"m4\");\n" + + "\n" + + " mgr.recordMetric(\"acc1\", \"m5\");\n" + + " mgr.recordMetric(\"acc1\", \"m6\");\n" + + "\n" + + " System.out.println(\"BEFORE_EXIT\");\n" + + " mgr.deregisterClient(\"acc1\", client3);\n" + + " mgr.deregisterClient(\"acc1\", client2);\n" + + " mgr.deregisterClient(\"acc1\", client1);\n" + + " System.out.println(\"BEFORE_EXIT1\");\n" + + " System.exit(1);\n" + + " }\n" + + "\n" + + " private static AbfsClient spyClient(AzureBlobFileSystem azureBlobFileSystem,\n" + + " AtomicInteger counter) throws IOException {\n" + + "\n" + + " AzureBlobFileSystemStore store = Mockito.spy(azureBlobFileSystem.getAbfsStore());\n" + + " Mockito.doReturn(store).when(azureBlobFileSystem).getAbfsStore();\n" + + "\n" + + " AbfsClient client = Mockito.spy(store.getClient());\n" + + " Mockito.doReturn(client).when(store).getClient();\n" + + "\n" + + " Mockito.doAnswer(inv -> {\n" + + " counter.incrementAndGet();\n" + + " System.out.println(\"FLUSH:\" + inv.getArguments()[0]);\n" + + " return null;\n" + + " }).when(client).getMetricCall(Mockito.any());\n" + + "\n" + + " return client;\n" + + " }\n" + + "}\n"; + + runProgramAndCaptureOutput(program, true, 1); + } + + /** + * Tests that the shutdown hook does not flush metrics on JVM crash. + */ + @Test + void testAggregatedMetricsManagerWithJVMCrash() throws Exception { + // ------------------------------- + // Program 3 (JVM Crash) + // ------------------------------- + String program = + "import org.apache.hadoop.fs.azurebfs.services.*;\n" + + "import org.apache.hadoop.fs.azurebfs.utils.*;\n" + + "import org.apache.hadoop.conf.Configuration;\n" + + "import org.apache.hadoop.fs.FileSystem;\n" + + "import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;\n" + + "import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore;\n" + + "import java.util.*;\n" + + "import java.util.concurrent.atomic.AtomicInteger;\n" + + "import java.io.IOException;\n" + + "import java.net.URI;\n" + + "import java.lang.reflect.*;\n" + + "import org.mockito.Mockito;\n" + + "\n" + + "public class ShutdownTestProg {\n" + + "\n" + + " public static void main(String[] args) throws Exception {\n" + + " AggregateMetricsManager mgr = AggregateMetricsManager.get(10, 3);\n" + + "\n" + + " // Track how many times metrics flush\n" + + " AtomicInteger calls1 = new AtomicInteger();\n" + + " AtomicInteger calls2 = new AtomicInteger();\n" + + " AtomicInteger calls3 = new AtomicInteger();\n" + + "\n" + + " URI uri = new URI(\"abfss://test@manishtestfnsnew.dfs.core.windows.net\");\n" + + " Configuration config = new Configuration();\n" + + "\n" + + " // Initialize 3 separate file system instances\n" + + " AzureBlobFileSystem fs1 = (AzureBlobFileSystem) FileSystem.newInstance(uri, config);\n" + + " AzureBlobFileSystem fs2 = (AzureBlobFileSystem) FileSystem.newInstance(uri, config);\n" + + " AzureBlobFileSystem fs3 = (AzureBlobFileSystem) FileSystem.newInstance(uri, config);\n" + + "\n" + + " // Create 3 spy clients\n" + + " AbfsClient client1 = spyClient(Mockito.spy(fs1), calls1);\n" + + " AbfsClient client2 = spyClient(Mockito.spy(fs2), calls2);\n" + + " AbfsClient client3 = spyClient(Mockito.spy(fs3), calls3);\n" + + "\n" + + " mgr.registerClient(\"acc1\", client1);\n" + + " mgr.registerClient(\"acc2\", client2);\n" + + " mgr.registerClient(\"acc3\", client3);\n" + + "\n" + + " // Produce metrics on all clients\n" + + " mgr.recordMetric(\"acc1\", \"m1\");\n" + + " mgr.recordMetric(\"acc1\", \"m2\");\n" + + " mgr.recordMetric(\"acc2\", \"m3\");\n" + + " mgr.recordMetric(\"acc2\", \"m4\");\n" + + " mgr.recordMetric(\"acc3\", \"m5\");\n" + + " mgr.recordMetric(\"acc3\", \"m6\");\n" + + "\n" + + " System.out.println(\"BEFORE_EXIT\");\n" + + " crashJvm();\n" + + " }\n" + + "\n" + + " private static void crashJvm() throws Exception {\n" + + " Field f = sun.misc.Unsafe.class.getDeclaredField(\"theUnsafe\");\n" + + " f.setAccessible(true);\n" + + " sun.misc.Unsafe unsafe = (sun.misc.Unsafe) f.get(null);\n" + + " unsafe.putAddress(0, 0); // SIGSEGV → Immediate JVM crash\n" + // 128 + 6 = 134 (exitcode for SIGABRT) + + " }\n" + + "\n" + + " private static AbfsClient spyClient(AzureBlobFileSystem azureBlobFileSystem, AtomicInteger counter) throws IOException {\n" + + " AzureBlobFileSystemStore store = Mockito.spy(azureBlobFileSystem.getAbfsStore());\n" + + " Mockito.doReturn(store).when(azureBlobFileSystem).getAbfsStore();\n" + + " AbfsClient client = Mockito.spy(store.getClient());\n" + + " Mockito.doReturn(client).when(store).getClient();\n" + + "\n" + + " Mockito.doAnswer(inv -> {\n" + + " counter.incrementAndGet();\n" + + " System.out.println(\"FLUSH:\" + inv.getArguments()[0]);\n" + + " return null;\n" + + " }).when(client).getMetricCall(Mockito.any());\n" + + "\n" + + " return client;\n" + + " }\n" + + "}\n"; + + runProgramAndCaptureOutput(program, false, 134); + } + + /** + * Compiles and runs a Java program in a separate JVM, capturing its output. + * + * @param program The Java program source code as a string. + * @param expectMetricsFlush Whether to expect metrics flush output. + * @throws IOException If an I/O error occurs. + * @throws InterruptedException If the thread is interrupted while waiting. + */ + private void runProgramAndCaptureOutput(String program, + boolean expectMetricsFlush, int expectedExitCode) + throws IOException, InterruptedException { + Path tempFile = Files.createTempFile("ShutdownTestProg", ".java"); + Files.write(tempFile, program.getBytes(StandardCharsets.UTF_8)); + + Path javaFile = tempFile.getParent().resolve("ShutdownTestProg.java"); + Files.move(tempFile, javaFile, StandardCopyOption.REPLACE_EXISTING); + + Process javac = new ProcessBuilder( + "javac", + "-classpath", System.getProperty("java.class.path"), + javaFile.toAbsolutePath().toString()) + .redirectErrorStream(true) + .start(); + + String compileOutput = readProcessOutput(javac); + javac.waitFor(); + + Assertions.assertThat(javac.exitValue()) + .withFailMessage("Compilation failed:\n" + compileOutput) + .isEqualTo(0); + + String classpath = javaFile.getParent().toAbsolutePath() + + File.pathSeparator + + System.getProperty("java.class.path"); + + Process javaProc = new ProcessBuilder("java", + "-XX:ErrorFile=/tmp/no_hs_err_%p.log", + "-classpath", classpath, + "ShutdownTestProg") + .redirectErrorStream(true) + .start(); + + String output = readProcessOutput(javaProc); + int exitCode = javaProc.waitFor(); + + Assertions.assertThat(output).contains("BEFORE_EXIT"); + Assertions.assertThat(exitCode).isEqualTo(expectedExitCode); + + if (expectMetricsFlush) { + Assertions.assertThat(output).contains("FLUSH:"); + } else { + Assertions.assertThat(output).doesNotContain("FLUSH:"); + } + } + + /** + * Reads all output from a process's input stream. + * + * @param proc The process to read from. + * @return The output as a string. + * @throws IOException If an I/O error occurs. + * @throws InterruptedException If the thread is interrupted while waiting. + */ + private static String readProcessOutput(Process proc) + throws IOException, InterruptedException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + Thread t = new Thread(() -> { + try (InputStream in = proc.getInputStream()) { + byte[] buf = new byte[4096]; + int n; + while ((n = in.read(buf)) != -1) { + out.write(buf, 0, n); + } + } catch (IOException e) { + e.printStackTrace(); + } + }); + t.start(); + int exitCode = proc.waitFor(); // wait for process to exit + t.join(); // wait for reading thread to finish + return out.toString(String.valueOf(StandardCharsets.UTF_8)); + } + + /** + * Helper method to create a spied client and record timestamp on getMetricCall + */ + private AbfsClient createSpiedClient(AtomicLong time) throws IOException { + AzureBlobFileSystem fs = Mockito.spy(this.getFileSystem()); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + Mockito.doReturn(store).when(fs).getAbfsStore(); + + AbfsClient client = Mockito.spy(store.getClient()); + Mockito.doReturn(client).when(store).getClient(); + + Mockito.doAnswer(inv -> { + time.set(System.nanoTime()); + return inv.callRealMethod(); + }).when(client).getMetricCall(Mockito.any()); + + return client; + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestApacheHttpClientFallback.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestApacheHttpClientFallback.java index 55c5f76c71b52..42b27a0111a3b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestApacheHttpClientFallback.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestApacheHttpClientFallback.java @@ -177,7 +177,8 @@ private AbfsRestOperation getMockRestOperation(int[] retryIteration) Mockito.doNothing() .when(op) - .signRequest(Mockito.any(AbfsHttpOperation.class), Mockito.anyInt()); + .signRequest(Mockito.any(AbfsHttpOperation.class), Mockito.anyInt(), + Mockito.anyBoolean()); Mockito.doAnswer(answer -> { AbfsHttpOperation operation = Mockito.spy( diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java new file mode 100644 index 0000000000000..b87e1860179ab --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import java.util.concurrent.locks.LockSupport; + +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; + +public class TestSimpleRateLimiter { + /** + * Verifies that the rate limiter does not introduce unnecessary blocking + * when calls are naturally spaced apart longer than the required interval. + * + * The test creates a limiter allowing 2 permits per second (500ms + * interval). After calling {@code acquire()}, it waits 600ms—longer than + * required—so the next {@code acquire()} should return immediately. + */ + @Test + void testNoWaitWhenSpacedOut() throws InvalidConfigurationValueException { + // 2 permits per second → 500 ms interval + SimpleRateLimiter limiter = new SimpleRateLimiter(2); + + limiter.acquire(); + // Sleep longer than required interval + LockSupport.parkNanos(600_000_000L); // 600 ms + + long before = System.nanoTime(); + limiter.acquire(); // Should not block + long after = System.nanoTime(); + + long elapsed = after - before; + + // Should be less than 5ms + Assertions.assertThat(elapsed < 5_000_000L) + .describedAs("acquire() should not block when enough time has passed") + .isTrue(); + } + + /** + * Verifies that the rate limiter enforces the correct delay when + * {@code acquire()} is called faster than the configured rate. + * + * At 5 permits per second (200ms interval), two immediate consecutive + * calls should cause the second call to block for roughly 200ms. + */ + @Test + void testRateLimitingDelay() throws InvalidConfigurationValueException { + // 5 permits per second → 200ms interval + SimpleRateLimiter limiter = new SimpleRateLimiter(5); + + limiter.acquire(); // First call never waits + + long before = System.nanoTime(); + limiter.acquire(); // Second call immediately → should wait ~200ms + long after = System.nanoTime(); + + long elapsedMs = (after - before) / 1_000_000; + + // Expect ~200ms, so allow tolerance + Assertions.assertThat(elapsedMs >= 180 && elapsedMs <= 260) + .describedAs("Expected about 200ms wait, but was " + elapsedMs + " ms") + .isTrue(); + } + + /** + * Tests that multiple rapid calls produce cumulative waiting consistent + * with the configured permit interval. + * + * At 10 permits per second (100ms interval), five immediate calls should + * take around 400ms total (the first call is free; the remaining four + * require spacing). + */ + @Test + void testMultipleBurstCalls() throws InvalidConfigurationValueException { + // 10 permits per second → 100ms interval + SimpleRateLimiter limiter = new SimpleRateLimiter(10); + + long totalStart = System.nanoTime(); + + for (int i = 0; i < 5; i++) { + limiter.acquire(); + } + + long totalMs = (System.nanoTime() - totalStart) / 1_000_000; + + // 5 calls → should take around 400ms (first is free, next 4 need +100ms each) + Assertions.assertThat(totalMs >= 350 && totalMs <= 550) + .describedAs("Expected around 400ms total but got " + totalMs + "ms") + .isTrue(); + } + + /** + * Verifies that when 10 rapid acquire() calls are made with a rate limit + * of 3 permits per second (≈333ms interval), the total execution time is + * spread across ~3 seconds, since each call must be spaced by the interval. + * + * Expected timing: + * interval = 333ms + * first call: no wait + * remaining 9 calls must wait: 9 × 333ms ≈ 2997ms + * + * Total expected time: around 3.0 seconds. + */ + @Test + void testMultipleBurstCallsWhenPermitIsLess() + throws InvalidConfigurationValueException { + int permitsPerSecond = 3; + SimpleRateLimiter limiter = new SimpleRateLimiter(permitsPerSecond); + + long start = System.nanoTime(); + + for (int i = 0; i < 10; i++) { + limiter.acquire(); + } + + long end = System.nanoTime(); + long elapsedMs = (end - start) / 1_000_000; + + // Expected ~3000ms, allow tolerance due to scheduler delays. + Assertions.assertThat(elapsedMs >= 2700 && elapsedMs <= 3500) + .describedAs("Expected ~3000ms, but got " + elapsedMs + "ms") + .isTrue(); + } +} + From abf1f1e90be6961e80bc486a2fedd3c763ceba59 Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Tue, 16 Dec 2025 03:25:03 -0800 Subject: [PATCH 02/10] Removed unwanted changes --- hadoop-tools/hadoop-azure/pom.xml | 1 - .../hadoop/fs/azurebfs/AbfsConfiguration.java | 2 -- .../constants/FileSystemConfigurations.java | 2 +- .../fs/azurebfs/services/KeepAliveCache.java | 5 ----- .../fs/azurebfs/services/MetricsBucket.java | 18 ++++++++++++++++++ .../utils/ResourceUtilizationUtils.java | 3 +-- .../fs/azurebfs/utils/TracingContext.java | 2 -- .../azurebfs/utils/TracingHeaderVersion.java | 2 +- 8 files changed, 21 insertions(+), 14 deletions(-) diff --git a/hadoop-tools/hadoop-azure/pom.xml b/hadoop-tools/hadoop-azure/pom.xml index 4234128787e8d..d0bfc6852befe 100644 --- a/hadoop-tools/hadoop-azure/pom.xml +++ b/hadoop-tools/hadoop-azure/pom.xml @@ -29,7 +29,6 @@ Currently this consists of a filesystem client to read data from and write data to Azure Storage. - 3.5.0-METRICS jar diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index 1c01cdce3edd5..debd6a0af815a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -79,9 +79,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DOT; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.*; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_EMIT_INTERVAL_MINS; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.*; -import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_METRIC_EMIT_INTERVAL_MINS; import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.INCORRECT_INGRESS_TYPE; /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index a8eba59e0f521..787e2ca02cb3d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -140,7 +140,7 @@ public final class FileSystemConfigurations { public static final boolean DEFAULT_SHOULD_EMIT_METRICS_ON_IDLE_TIME = false; public static final long DEFAULT_METRIC_EMIT_THRESHOLD = 100_000L; public static final long DEFAULT_METRICS_EMIT_THRESHOLD_INTERVAL_SECS = 60; - public static final long DEFAULT_METRIC_EMIT_INTERVAL_MINS = 2 * 60; + public static final long DEFAULT_METRIC_EMIT_INTERVAL_MINS = 60; public static final int DEFAULT_MAX_METRICS_CALLS_PER_SECOND = 3; public static final boolean DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = true; public static final int DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS = 60_000; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/KeepAliveCache.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/KeepAliveCache.java index 09dadc29ea979..1ad1ba899dfd6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/KeepAliveCache.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/KeepAliveCache.java @@ -32,7 +32,6 @@ import org.apache.hadoop.fs.ClosedIOException; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.http.HttpClientConnection; -import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.KEEP_ALIVE_CACHE_CLOSED; @@ -124,10 +123,6 @@ class KeepAliveCache extends LinkedBlockingDeque return thread; }); } - - PoolingHttpClientConnectionManager poolingHttpClientConnectionManager = new PoolingHttpClientConnectionManager(); - poolingHttpClientConnectionManager.setMaxTotal(maxCacheConnections); - poolingHttpClientConnectionManager.setDefaultMaxPerRoute(maxCacheConnections); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/MetricsBucket.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/MetricsBucket.java index ad93dcd4b658a..c3b555161264b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/MetricsBucket.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/MetricsBucket.java @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.fs.azurebfs.services; import java.io.IOException; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/ResourceUtilizationUtils.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/ResourceUtilizationUtils.java index cf47eff782d6c..c151a483b1853 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/ResourceUtilizationUtils.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/ResourceUtilizationUtils.java @@ -149,8 +149,7 @@ public static long getMaxHeapMemory() { * @return the PID of the current JVM process */ public static long getJvmProcessId() { - String jvmName = ManagementFactory.getRuntimeMXBean().getName(); - return Long.parseLong(jvmName.split("@")[0]); + return ProcessHandle.current().pid(); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java index ca00a888c9d1d..265d72314471b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java @@ -70,7 +70,6 @@ public class TracingContext { private String position = EMPTY_STRING; // position of read/write in remote file private String metricResults = EMPTY_STRING; private ReadType readType = ReadType.UNKNOWN_READ; - private boolean isMetricCall = false; private String resourceUtilizationMetricResults = EMPTY_STRING; /** @@ -155,7 +154,6 @@ public TracingContext(TracingContext originalTracingContext) { } this.metricResults = originalTracingContext.metricResults; this.readType = originalTracingContext.readType; - this.isMetricCall = originalTracingContext.isMetricCall; this.resourceUtilizationMetricResults = originalTracingContext.resourceUtilizationMetricResults; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderVersion.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderVersion.java index 7f6ee60f0a1ef..dc05e8f251ee5 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderVersion.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderVersion.java @@ -51,7 +51,7 @@ public enum TracingHeaderVersion { /** * Metrics to client request id header. */ - AV0("av0", 15); + AV0("av0", 3); private final String versionString; private final int fieldCount; From 9111f5eb06a6d5d0e5c77aa1c5d397d0f912b4cf Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Tue, 16 Dec 2025 06:35:35 -0800 Subject: [PATCH 03/10] Fixed Checkstyle --- .../fs/azurebfs/services/AbfsClient.java | 6 +-- .../services/AggregateMetricsManager.java | 10 ++--- .../fs/azurebfs/utils/SimpleRateLimiter.java | 5 ++- .../services/TestAggregateMetricsManager.java | 30 +++++++++------ .../azurebfs/utils/TestSimpleRateLimiter.java | 37 +++++++++++++------ 5 files changed, 56 insertions(+), 32 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index bc0763f802ee6..63f7d4c44c0db 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -325,9 +325,9 @@ private AbfsClient(final URL baseUrl, metricAccountName.substring(0, dotIndex), metricAccountKey); hasSeparateMetricAccount = true; - setMetricsUrl(metricAccountName.startsWith(HTTPS_SCHEME) ? - metricAccountName : HTTPS_SCHEME + COLON + - FORWARD_SLASH + FORWARD_SLASH + metricAccountName); + setMetricsUrl(metricAccountName.startsWith(HTTPS_SCHEME) + ? metricAccountName : HTTPS_SCHEME + COLON + + FORWARD_SLASH + FORWARD_SLASH + metricAccountName); } catch (IllegalArgumentException e) { throw new IOException( "Exception while initializing metric credentials ", e); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java index 28d27ae5922e3..a45531a00239b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java @@ -36,7 +36,7 @@ public final class AggregateMetricsManager { /** * Singleton instance of AggregateMetricsManager. */ - private static volatile AggregateMetricsManager INSTANCE; + private static volatile AggregateMetricsManager instance; // Map of account name to MetricsBucket. private final ConcurrentHashMap buckets = @@ -94,23 +94,21 @@ private AggregateMetricsManager(final long dispatchIntervalInMins, */ public static AggregateMetricsManager get(final long dispatchIntervalInMins, final int permitsPerSecond) { - - AggregateMetricsManager instance = INSTANCE; if (instance != null) { return instance; } synchronized (AggregateMetricsManager.class) { - if (INSTANCE == null) { + if (instance == null) { try { - INSTANCE = new AggregateMetricsManager( + instance = new AggregateMetricsManager( dispatchIntervalInMins, permitsPerSecond); } catch (InvalidConfigurationValueException e) { throw new RuntimeException( "Failed to initialize AggregateMetricsManager", e); } } - return INSTANCE; + return instance; } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java index f12dc78944eff..eadd632b80dab 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java @@ -31,6 +31,9 @@ public class SimpleRateLimiter { /** The next allowed time (in nanoseconds) when a permit may be issued. */ private final AtomicLong nextAllowedTime = new AtomicLong(0); + /** Number of nanoseconds in one second. */ + private static final long NANOS_PER_SECOND = 1_000_000_000L; + /** * Creates a rate limiter with a fixed number of permits allowed per second. * @@ -44,7 +47,7 @@ public SimpleRateLimiter(int permitsPerSecond) throw new InvalidConfigurationValueException( "Aggregated Metrics Per Second Call"); } - this.intervalNanos = 1_000_000_000L / permitsPerSecond; + this.intervalNanos = NANOS_PER_SECOND / permitsPerSecond; } /** diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java index 538ba54f5d0ac..16503f5da7384 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java @@ -46,6 +46,9 @@ public class TestAggregateMetricsManager extends AbstractAbfsIntegrationTest { + /** Number of nanoseconds in one millisecond. */ + private static final long NANOS_PER_MILLISECOND = 1_000_000L; + // The manager under test private AggregateMetricsManager manager; @@ -269,6 +272,9 @@ public void testDeregisterNonexistentClient() throws IOException { @Test public void testMultipleMetricCallsInCaseDataIsMoreThanBufferSize() throws Exception { + final int metricsDataSize1 = 927; // size of aggregated data for first 3 calls + final int metricsDataSize2 = 115; // size of aggregated data for last call + final int numberOfMetrics = 25; // total metrics to send AtomicInteger calls = new AtomicInteger(0); AzureBlobFileSystem azureBlobFileSystem = Mockito.spy(this.getFileSystem()); AzureBlobFileSystemStore store = Mockito.spy( @@ -282,17 +288,17 @@ public void testMultipleMetricCallsInCaseDataIsMoreThanBufferSize() if (calls.get() < 3) { // first three calls, data size will be 927 chars Assertions.assertThat(data.length()) .describedAs("Aggregated metric data size should be 927 chars") - .isEqualTo(927); + .isEqualTo(metricsDataSize1); } else { // last call, data size will be 115 chars Assertions.assertThat(data.length()) .describedAs("Aggregated metric data size should be 115 chars") - .isEqualTo(115); + .isEqualTo(metricsDataSize2); } calls.incrementAndGet(); return inv.callRealMethod(); }).when(client).getMetricCall(Mockito.any()); manager.registerClient("acc1", client); - for (int i = 0; i < 25; i++) { + for (int i = 0; i < numberOfMetrics; i++) { manager.recordMetric("acc1", getMetricsData() + "$OT=163$RT=6.024%$TRNR=2543$TR=2706"); // each data is 113 chars } @@ -314,10 +320,10 @@ public void testMultipleMetricCallsInCaseDataIsMoreThanBufferSize() */ @Test public void testRateLimitMetricCalls() throws IOException, InterruptedException { - int permitsPerSecond = 3; - long minIntervalMs = 1_000 / permitsPerSecond; // 333ms - double toleranceMs = 15.0; // allow 15ms jitter - int numClients = 10; + final int permitsPerSecond = 3; + final long minIntervalMs = 1_000 / permitsPerSecond; // 333ms + final double toleranceMs = 15; // allow 15ms jitter + final int numClients = 10; // Store timestamps for each client final List times = new ArrayList<>(); @@ -356,10 +362,10 @@ public void testRateLimitMetricCalls() throws IOException, InterruptedException for (int i = 0; i < times.size(); i++) { for (int j = i + 1; j < times.size(); j++) { double diffMs = Math.abs(times.get(i).get() - times.get(j).get()) - / 1_000_000.0; + / (double) NANOS_PER_MILLISECOND; Assertions.assertThat(diffMs) .describedAs( - "Expected at least %.3f ms (tolerance %.3f) between metric sends", + "Expected at least %d ms (tolerance %.3f) between metric sends", minIntervalMs, toleranceMs) .isGreaterThanOrEqualTo(minIntervalMs - toleranceMs); } @@ -520,6 +526,7 @@ public void testAggregatedMetricsManagerWithJVMExit1() */ @Test void testAggregatedMetricsManagerWithJVMCrash() throws Exception { + final int crashExitCode = 134; // ------------------------------- // Program 3 (JVM Crash) // ------------------------------- @@ -600,7 +607,7 @@ void testAggregatedMetricsManagerWithJVMCrash() throws Exception { + " }\n" + "}\n"; - runProgramAndCaptureOutput(program, false, 134); + runProgramAndCaptureOutput(program, false, crashExitCode); } /** @@ -668,10 +675,11 @@ private void runProgramAndCaptureOutput(String program, */ private static String readProcessOutput(Process proc) throws IOException, InterruptedException { + final int maxBufferSize = 4096; ByteArrayOutputStream out = new ByteArrayOutputStream(); Thread t = new Thread(() -> { try (InputStream in = proc.getInputStream()) { - byte[] buf = new byte[4096]; + byte[] buf = new byte[maxBufferSize]; int n; while ((n = in.read(buf)) != -1) { out.write(buf, 0, n); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java index b87e1860179ab..1d736d4a37535 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java @@ -26,6 +26,10 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; public class TestSimpleRateLimiter { + + /** Number of nanoseconds in one millisecond. */ + private static final long NANOS_PER_MILLISECOND = 1_000_000L; + /** * Verifies that the rate limiter does not introduce unnecessary blocking * when calls are naturally spaced apart longer than the required interval. @@ -41,7 +45,7 @@ void testNoWaitWhenSpacedOut() throws InvalidConfigurationValueException { limiter.acquire(); // Sleep longer than required interval - LockSupport.parkNanos(600_000_000L); // 600 ms + LockSupport.parkNanos(600 * NANOS_PER_MILLISECOND); // 600 ms long before = System.nanoTime(); limiter.acquire(); // Should not block @@ -50,7 +54,7 @@ void testNoWaitWhenSpacedOut() throws InvalidConfigurationValueException { long elapsed = after - before; // Should be less than 5ms - Assertions.assertThat(elapsed < 5_000_000L) + Assertions.assertThat(elapsed < 5 * NANOS_PER_MILLISECOND) .describedAs("acquire() should not block when enough time has passed") .isTrue(); } @@ -64,8 +68,11 @@ void testNoWaitWhenSpacedOut() throws InvalidConfigurationValueException { */ @Test void testRateLimitingDelay() throws InvalidConfigurationValueException { + final int permitsPerSecond = 5; + final long minTimeAllowed = 180; + final long maxTimeAllowed = 260; // 5 permits per second → 200ms interval - SimpleRateLimiter limiter = new SimpleRateLimiter(5); + SimpleRateLimiter limiter = new SimpleRateLimiter(permitsPerSecond); limiter.acquire(); // First call never waits @@ -73,10 +80,11 @@ void testRateLimitingDelay() throws InvalidConfigurationValueException { limiter.acquire(); // Second call immediately → should wait ~200ms long after = System.nanoTime(); - long elapsedMs = (after - before) / 1_000_000; + long elapsedMs = (after - before) / NANOS_PER_MILLISECOND; // Expect ~200ms, so allow tolerance - Assertions.assertThat(elapsedMs >= 180 && elapsedMs <= 260) + Assertions.assertThat( + elapsedMs >= minTimeAllowed && elapsedMs <= maxTimeAllowed) .describedAs("Expected about 200ms wait, but was " + elapsedMs + " ms") .isTrue(); } @@ -91,8 +99,11 @@ void testRateLimitingDelay() throws InvalidConfigurationValueException { */ @Test void testMultipleBurstCalls() throws InvalidConfigurationValueException { + final int permitsPerSecond = 10; + final long minTimeAllowed = 350; + final long maxTimeAllowed = 550; // 10 permits per second → 100ms interval - SimpleRateLimiter limiter = new SimpleRateLimiter(10); + SimpleRateLimiter limiter = new SimpleRateLimiter(permitsPerSecond); long totalStart = System.nanoTime(); @@ -100,10 +111,11 @@ void testMultipleBurstCalls() throws InvalidConfigurationValueException { limiter.acquire(); } - long totalMs = (System.nanoTime() - totalStart) / 1_000_000; + long totalMs = (System.nanoTime() - totalStart) / NANOS_PER_MILLISECOND; // 5 calls → should take around 400ms (first is free, next 4 need +100ms each) - Assertions.assertThat(totalMs >= 350 && totalMs <= 550) + Assertions.assertThat( + totalMs >= minTimeAllowed && totalMs <= maxTimeAllowed) .describedAs("Expected around 400ms total but got " + totalMs + "ms") .isTrue(); } @@ -123,7 +135,9 @@ void testMultipleBurstCalls() throws InvalidConfigurationValueException { @Test void testMultipleBurstCallsWhenPermitIsLess() throws InvalidConfigurationValueException { - int permitsPerSecond = 3; + final int permitsPerSecond = 3; + final long minTimeAllowed = 2700; + final long maxTimeAllowed = 3500; SimpleRateLimiter limiter = new SimpleRateLimiter(permitsPerSecond); long start = System.nanoTime(); @@ -133,10 +147,11 @@ void testMultipleBurstCallsWhenPermitIsLess() } long end = System.nanoTime(); - long elapsedMs = (end - start) / 1_000_000; + long elapsedMs = (end - start) / NANOS_PER_MILLISECOND; // Expected ~3000ms, allow tolerance due to scheduler delays. - Assertions.assertThat(elapsedMs >= 2700 && elapsedMs <= 3500) + Assertions.assertThat( + elapsedMs >= minTimeAllowed && elapsedMs <= maxTimeAllowed) .describedAs("Expected ~3000ms, but got " + elapsedMs + "ms") .isTrue(); } From 1af17c1a8ce5b3c1eef8f0504ab883674acfcbd9 Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Tue, 16 Dec 2025 21:52:40 -0800 Subject: [PATCH 04/10] Fixed checkstyle --- .../hadoop/fs/azurebfs/services/ITestAbfsClient.java | 10 ++++++---- .../fs/azurebfs/utils/TestSimpleRateLimiter.java | 3 ++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index 64d0e35fac869..920473d46ffd2 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -1062,7 +1062,8 @@ public void testGetMetricsCallMethod() throws Exception { */ @Test public void testMetricsEmitBasedOnCount() throws Exception { - runMetricsEmitTest(10L, true); + final long threshold = 10L; + runMetricsEmitTest(threshold, true); } /** @@ -1070,7 +1071,8 @@ public void testMetricsEmitBasedOnCount() throws Exception { */ @Test public void testMetricsEmitWithHighThreshold() throws Exception { - runMetricsEmitTest(100L, false); + final long threshold = 100L; + runMetricsEmitTest(threshold, false); } /** @@ -1079,11 +1081,11 @@ public void testMetricsEmitWithHighThreshold() throws Exception { */ private void runMetricsEmitTest(long threshold, boolean expectEmit) throws Exception { + final int totalWaitTime = 30; AzureBlobFileSystem fs = getFileSystem(); Configuration configuration = fs.getAbfsStore() .getAbfsConfiguration() .getRawConfiguration(); - int totalWaitTime = 30; configuration.setLong(FS_AZURE_METRIC_EMIT_THRESHOLD, threshold); configuration.setLong(FS_AZURE_METRICS_EMIT_THRESHOLD_INTERVAL_SECS, totalWaitTime); fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration); @@ -1122,7 +1124,7 @@ private void runMetricsEmitTest(long threshold, boolean expectEmit) stream.hflush(); if (fs.getAbfsStore() .isAppendBlobKey(fs.makeQualified(testPath).toString())) { - totalMetrics += 1;// +1 request + totalMetrics += 1; // +1 request } else { totalMetrics += 2; // +2 requests } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java index 1d736d4a37535..74419cc377f28 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java @@ -40,12 +40,13 @@ public class TestSimpleRateLimiter { */ @Test void testNoWaitWhenSpacedOut() throws InvalidConfigurationValueException { + final int millisToSleep = 600; // 600 ms // 2 permits per second → 500 ms interval SimpleRateLimiter limiter = new SimpleRateLimiter(2); limiter.acquire(); // Sleep longer than required interval - LockSupport.parkNanos(600 * NANOS_PER_MILLISECOND); // 600 ms + LockSupport.parkNanos(millisToSleep * NANOS_PER_MILLISECOND); long before = System.nanoTime(); limiter.acquire(); // Should not block From e6a60874bc31fc39d32e2ce128542050c4d4e704 Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Wed, 17 Dec 2025 02:42:50 -0800 Subject: [PATCH 05/10] Fixed checkstyle + rename few variables --- .../hadoop/fs/azurebfs/AbfsConfiguration.java | 28 ++++++++++++------- .../hadoop/fs/azurebfs/AbfsCountersImpl.java | 27 ++++++++++-------- .../azurebfs/constants/ConfigurationKeys.java | 5 ++-- .../constants/FileSystemConfigurations.java | 5 ++-- .../azurebfs/services/AbfsBackoffMetrics.java | 9 ++++-- .../fs/azurebfs/services/AbfsClient.java | 12 ++++---- .../fs/azurebfs/services/AbfsCounters.java | 4 ++- .../services/AbfsReadFooterMetrics.java | 2 -- .../fs/azurebfs/services/ITestAbfsClient.java | 4 +-- .../services/TestAbfsBackoffMetrics.java | 2 +- .../services/TestAggregateMetricsManager.java | 9 +----- 11 files changed, 59 insertions(+), 48 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index debd6a0af815a..4921284ba3def 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -340,22 +340,26 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_SHOULD_EMIT_METRICS_ON_IDLE_TIME) private boolean shouldEmitMetricsOnIdleTime; - @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_EMIT_THRESHOLD, - DefaultValue = DEFAULT_METRIC_EMIT_THRESHOLD) - private long metricEmitThreshold; + @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRICS_EMIT_THRESHOLD, + DefaultValue = DEFAULT_METRICS_EMIT_THRESHOLD) + private long metricsEmitThreshold; @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRICS_EMIT_THRESHOLD_INTERVAL_SECS, DefaultValue = DEFAULT_METRICS_EMIT_THRESHOLD_INTERVAL_SECS) private long metricsEmitThresholdIntervalInSecs; - @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_EMIT_INTERVAL_MINS, - DefaultValue = DEFAULT_METRIC_EMIT_INTERVAL_MINS) - private long metricEmitIntervalInMins; + @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRICS_EMIT_INTERVAL_MINS, + DefaultValue = DEFAULT_METRICS_EMIT_INTERVAL_MINS) + private long metricsEmitIntervalInMins; @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_MAX_METRICS_CALLS_PER_SECOND, DefaultValue = DEFAULT_MAX_METRICS_CALLS_PER_SECOND) private int maxMetricsCallsPerSecond; + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_BACKOFF_RETRY_METRICS_ENABLED, + DefaultValue = DEFAULT_BACKOFF_RETRY_METRICS_ENABLED) + private boolean backoffRetryMetricsEnabled; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ACCOUNT_OPERATION_IDLE_TIMEOUT, DefaultValue = DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS) private int accountOperationIdleTimeout; @@ -1326,12 +1330,12 @@ public boolean shouldEmitMetricsOnIdleTime() { return shouldEmitMetricsOnIdleTime; } - public long getMetricEmitThreshold() { - return metricEmitThreshold; + public long getMetricsEmitThreshold() { + return metricsEmitThreshold; } - public long getMetricEmitIntervalInMins() { - return metricEmitIntervalInMins; + public long getMetricsEmitIntervalInMins() { + return metricsEmitIntervalInMins; } public long getMetricsEmitThresholdIntervalInSecs() { @@ -1342,6 +1346,10 @@ public int getMaxMetricsCallsPerSecond() { return maxMetricsCallsPerSecond; } + public boolean isBackoffRetryMetricsEnabled() { + return backoffRetryMetricsEnabled; + } + public int getAccountOperationIdleTimeout() { return accountOperationIdleTimeout; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java index 8509caaa2891c..aa2a4d3900f22 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java @@ -201,19 +201,22 @@ public void initializeWriteResourceUtilizationMetrics() { @Override - public void initializeMetrics(MetricFormat metricFormat) { + public void initializeMetrics(final MetricFormat metricFormat, + final AbfsConfiguration abfsConfiguration) { switch (metricFormat) { - case INTERNAL_BACKOFF_METRIC_FORMAT: - abfsBackoffMetrics = new AbfsBackoffMetrics(); - break; - case INTERNAL_FOOTER_METRIC_FORMAT: - initializeReadFooterMetrics(); - case INTERNAL_METRIC_FORMAT: - abfsBackoffMetrics = new AbfsBackoffMetrics(); - initializeReadFooterMetrics(); - break; - default: - break; + case INTERNAL_BACKOFF_METRIC_FORMAT: + abfsBackoffMetrics = new AbfsBackoffMetrics( + abfsConfiguration.isBackoffRetryMetricsEnabled()); + break; + case INTERNAL_FOOTER_METRIC_FORMAT: + initializeReadFooterMetrics(); + case INTERNAL_METRIC_FORMAT: + abfsBackoffMetrics = new AbfsBackoffMetrics( + abfsConfiguration.isBackoffRetryMetricsEnabled()); + initializeReadFooterMetrics(); + break; + default: + break; } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index e6e4b4d382096..cc97658137010 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -74,10 +74,11 @@ public final class ConfigurationKeys { public static final String FS_AZURE_METRIC_FORMAT = "fs.azure.metric.format"; public static final String FS_AZURE_METRICS_COLLECTION_ENABLED = "fs.azure.metrics.collection.enabled"; public static final String FS_AZURE_SHOULD_EMIT_METRICS_ON_IDLE_TIME = "fs.azure.should.emit.metrics.on.idle.time"; - public static final String FS_AZURE_METRIC_EMIT_THRESHOLD = "fs.azure.metric.emit.threshold"; + public static final String FS_AZURE_METRICS_EMIT_THRESHOLD = "fs.azure.metrics.emit.threshold"; public static final String FS_AZURE_METRICS_EMIT_THRESHOLD_INTERVAL_SECS = "fs.azure.metrics.emit.threshold.interval.secs"; - public static final String FS_AZURE_METRIC_EMIT_INTERVAL_MINS = "fs.azure.metric.emit.interval.mins"; + public static final String FS_AZURE_METRICS_EMIT_INTERVAL_MINS = "fs.azure.metrics.emit.interval.mins"; public static final String FS_AZURE_MAX_METRICS_CALLS_PER_SECOND = "fs.azure.max.metrics.calls.per.second"; + public static final String FS_AZURE_BACKOFF_RETRY_METRICS_ENABLED = "fs.azure.backoff.retry.metrics.enabled"; public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME_REGX = "fs\\.azure\\.account\\.key\\.(.*)"; public static final String FS_AZURE_SECURE_MODE = "fs.azure.secure.mode"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 787e2ca02cb3d..eccc9df990349 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -138,10 +138,11 @@ public final class FileSystemConfigurations { public static final int DEFAULT_METRIC_ANALYSIS_TIMEOUT_MS = 60_000; public static final boolean DEFAULT_METRICS_COLLECTION_ENABLED = true; public static final boolean DEFAULT_SHOULD_EMIT_METRICS_ON_IDLE_TIME = false; - public static final long DEFAULT_METRIC_EMIT_THRESHOLD = 100_000L; + public static final long DEFAULT_METRICS_EMIT_THRESHOLD = 100_000L; public static final long DEFAULT_METRICS_EMIT_THRESHOLD_INTERVAL_SECS = 60; - public static final long DEFAULT_METRIC_EMIT_INTERVAL_MINS = 60; + public static final long DEFAULT_METRICS_EMIT_INTERVAL_MINS = 60; public static final int DEFAULT_MAX_METRICS_CALLS_PER_SECOND = 3; + public static final boolean DEFAULT_BACKOFF_RETRY_METRICS_ENABLED = false; public static final boolean DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = true; public static final int DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS = 60_000; public static final int DEFAULT_ANALYSIS_PERIOD_MS = 10_000; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBackoffMetrics.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBackoffMetrics.java index b78ed31338bd7..d7651010338ce 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBackoffMetrics.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBackoffMetrics.java @@ -81,10 +81,13 @@ public class AbfsBackoffMetrics extends AbstractAbfsStatisticsSource { private static final List RETRY_LIST = Arrays.asList( RetryValue.values()); + private final boolean isRetryMetricEnabled; + /** * Constructor to initialize the IOStatisticsStore with counters and gauges. */ - public AbfsBackoffMetrics() { + public AbfsBackoffMetrics(final boolean isRetryMetricEnabled) { + this.isRetryMetricEnabled = isRetryMetricEnabled; IOStatisticsStore ioStatisticsStore = iostatisticsStore() .withCounters(getMetricNames(TYPE_COUNTER)) .withGauges(getMetricNames(TYPE_GAUGE)) @@ -325,7 +328,9 @@ public String toString() { return EMPTY_STRING; } StringBuilder metricBuilder = new StringBuilder(); - getRetryMetrics(metricBuilder); + if (isRetryMetricEnabled) { + getRetryMetrics(metricBuilder); + } getBaseMetrics(metricBuilder); return metricBuilder.toString(); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 63f7d4c44c0db..304b3e1d2f0dd 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -251,7 +251,7 @@ private AbfsClient(final URL baseUrl, this.renameResilience = abfsConfiguration.getRenameResilience(); this.fileSystemId = fileSystemId; this.aggregateMetricsManager = AggregateMetricsManager.get( - abfsConfiguration.getMetricEmitIntervalInMins(), + abfsConfiguration.getMetricsEmitIntervalInMins(), abfsConfiguration.getMaxMetricsCallsPerSecond()); this.abfsServiceType = abfsServiceType; @@ -312,7 +312,7 @@ private AbfsClient(final URL baseUrl, String metricAccountName = abfsConfiguration.getMetricAccount(); String metricAccountKey = abfsConfiguration.getMetricAccountKey(); this.metricFormat = abfsConfiguration.getMetricFormat(); - abfsCounters.initializeMetrics(metricFormat); + abfsCounters.initializeMetrics(metricFormat, getAbfsConfiguration()); if (isNotEmpty(metricAccountName) && isNotEmpty( metricAccountKey)) { int dotIndex = metricAccountName.indexOf(AbfsHttpConstants.DOT); @@ -347,7 +347,7 @@ private AbfsClient(final URL baseUrl, () -> { if (getAbfsCounters().getAbfsBackoffMetrics() .getMetricValue(TOTAL_NUMBER_OF_REQUESTS) - >= getAbfsConfiguration().getMetricEmitThreshold()) { + >= getAbfsConfiguration().getMetricsEmitThreshold()) { emitCollectedMetrics(); } }, @@ -358,8 +358,8 @@ private AbfsClient(final URL baseUrl, // run every metricInterval minutes this.metricsEmitScheduler.scheduleAtFixedRate( this::emitCollectedMetrics, - abfsConfiguration.getMetricEmitIntervalInMins(), - abfsConfiguration.getMetricEmitIntervalInMins(), + abfsConfiguration.getMetricsEmitIntervalInMins(), + abfsConfiguration.getMetricsEmitIntervalInMins(), TimeUnit.MINUTES); // emit metrics based on idea time @@ -1664,7 +1664,7 @@ private synchronized String getMetricsData() { if (StringUtils.isEmpty(metrics)) { return null; } - abfsCounters.initializeMetrics(metricFormat); + abfsCounters.initializeMetrics(metricFormat, getAbfsConfiguration()); return TracingContext.validateClientCorrelationID( abfsConfiguration.getClientCorrelationId()) + COLON + fileSystemId + COLON + metrics; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsCounters.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsCounters.java index 4512db98fcc69..8ada20abf4358 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsCounters.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsCounters.java @@ -25,6 +25,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.AbfsStatistic; import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.apache.hadoop.fs.statistics.DurationTracker; @@ -77,7 +78,8 @@ String formString(String prefix, String separator, String suffix, @Override DurationTracker trackDuration(String key); - void initializeMetrics(MetricFormat metricFormat); + void initializeMetrics(MetricFormat metricFormat, + AbfsConfiguration abfsConfiguration); AbfsBackoffMetrics getAbfsBackoffMetrics(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java index 5a3f088f09ce1..5f9db34867e00 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java @@ -425,8 +425,6 @@ public void updateReadMetrics(final String filePathIdentifier, fileTypeMetrics.getReadCount() >= 1 && fileTypeMetrics.getCollectMetrics())) { updateMetrics(fileTypeMetrics, len, contentLength, nextReadPos); - } else { - // update metrics for non-footer reads } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index 920473d46ffd2..ede48ce6b5979 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -92,7 +92,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_KEY; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_NAME; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_EMIT_THRESHOLD; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_EMIT_THRESHOLD; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JAVA_VENDOR; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JAVA_VERSION; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.OS_ARCH; @@ -1086,7 +1086,7 @@ private void runMetricsEmitTest(long threshold, boolean expectEmit) Configuration configuration = fs.getAbfsStore() .getAbfsConfiguration() .getRawConfiguration(); - configuration.setLong(FS_AZURE_METRIC_EMIT_THRESHOLD, threshold); + configuration.setLong(FS_AZURE_METRICS_EMIT_THRESHOLD, threshold); configuration.setLong(FS_AZURE_METRICS_EMIT_THRESHOLD_INTERVAL_SECS, totalWaitTime); fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsBackoffMetrics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsBackoffMetrics.java index b8ce3ba411c0c..956548a8df670 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsBackoffMetrics.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsBackoffMetrics.java @@ -40,7 +40,7 @@ public class TestAbfsBackoffMetrics { */ @BeforeEach public void setUp() { - metrics = new AbfsBackoffMetrics(); + metrics = new AbfsBackoffMetrics(true); } /** diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java index 16503f5da7384..16d3ca1db772c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java @@ -50,7 +50,7 @@ public class TestAggregateMetricsManager extends AbstractAbfsIntegrationTest { private static final long NANOS_PER_MILLISECOND = 1_000_000L; // The manager under test - private AggregateMetricsManager manager; + private final AggregateMetricsManager manager; /** * Constructor for TestAggregateMetricsManager. @@ -59,13 +59,6 @@ public class TestAggregateMetricsManager extends AbstractAbfsIntegrationTest { */ public TestAggregateMetricsManager() throws Exception { super(); - } - - /** - * Resets the AggregateMetricsManager instance before each test. - */ - @BeforeEach - void resetManager() throws Exception { manager = AggregateMetricsManager.get(10, 3); } From ba2961c5f0111a0122da322e7c9f1da52b7cb2ee Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Fri, 19 Dec 2025 01:55:25 -0800 Subject: [PATCH 06/10] Checkstyle fix --- .../services/AggregateMetricsManager.java | 10 ++-- .../fs/azurebfs/utils/SimpleRateLimiter.java | 49 +++++++++---------- .../fs/azurebfs/services/ITestAbfsClient.java | 9 +++- .../services/TestAggregateMetricsManager.java | 13 ++--- .../azurebfs/utils/TestSimpleRateLimiter.java | 15 +++++- 5 files changed, 56 insertions(+), 40 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java index a45531a00239b..9ba910f7e7c72 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java @@ -33,18 +33,16 @@ */ public final class AggregateMetricsManager { - /** - * Singleton instance of AggregateMetricsManager. - */ + // Singleton instance of AggregateMetricsManager. private static volatile AggregateMetricsManager instance; + // Rate limiter to control the rate of dispatching metrics. + private static volatile SimpleRateLimiter rateLimiter; + // Map of account name to MetricsBucket. private final ConcurrentHashMap buckets = new ConcurrentHashMap<>(); - // Rate limiter to control the rate of dispatching metrics. - private final SimpleRateLimiter rateLimiter; - // Scheduler for periodic dispatching of metrics. private final ScheduledExecutorService scheduler; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java index eadd632b80dab..b46759d3f1552 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java @@ -18,57 +18,56 @@ package org.apache.hadoop.fs.azurebfs.utils; -import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.LockSupport; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; -public class SimpleRateLimiter { +public final class SimpleRateLimiter { - /** The minimum interval between permits, in nanoseconds. */ + // Interval between permits in nanoseconds. private final long intervalNanos; - /** The next allowed time (in nanoseconds) when a permit may be issued. */ - private final AtomicLong nextAllowedTime = new AtomicLong(0); + // Next allowed time to acquire a permit in nanoseconds. + private long nextAllowedTime; /** Number of nanoseconds in one second. */ private static final long NANOS_PER_SECOND = 1_000_000_000L; /** - * Creates a rate limiter with a fixed number of permits allowed per second. + * Constructs a SimpleRateLimiter that allows the specified number of + * permits per second. * - * @param permitsPerSecond the maximum number of permits allowed per second; - * must be a positive integer - * @throws IllegalArgumentException if {@code permitsPerSecond <= 0} + * @param permitsPerSecond Number of permits allowed per second. + * @throws InvalidConfigurationValueException if permitsPerSecond is + * less than or equal to zero. */ public SimpleRateLimiter(int permitsPerSecond) throws InvalidConfigurationValueException { if (permitsPerSecond <= 0) { throw new InvalidConfigurationValueException( - "Aggregated Metrics Per Second Call"); + "permitsPerSecond must be > 0"); } this.intervalNanos = NANOS_PER_SECOND / permitsPerSecond; + this.nextAllowedTime = System.nanoTime(); } /** - * Acquires a permit from the rate limiter, blocking if necessary to maintain - * the configured rate. - * - * If the current time is earlier than the next allowed permit time, this - * method blocks for the required duration. Otherwise, it proceeds - * immediately. + * Acquires a permit from the rate limiter, blocking until one is available. */ - public void acquire() { - while (true) { // In case of failure, it will retry + public synchronized void acquire() { + while (true) { long now = System.nanoTime(); - long prev = nextAllowedTime.get(); - long next = Math.max(prev, now) + intervalNanos; + long wait = nextAllowedTime - now; + + if (wait <= 0) { + nextAllowedTime = now + intervalNanos; + return; + } + + LockSupport.parkNanos(wait); - if (nextAllowedTime.compareAndSet(prev, next)) { - long wait = next - now - intervalNanos; // adjust for this permit - if (wait > 0) { - LockSupport.parkNanos(wait); - } + if (Thread.interrupted()) { + Thread.currentThread().interrupt(); return; } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index ede48ce6b5979..4ac364b3df8ff 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -1103,6 +1103,13 @@ private void runMetricsEmitTest(long threshold, boolean expectEmit) .getIngressClient() instanceof AbfsDfsClient) { // create file + set properties requests totalMetrics += 1; + } else if (fs.getAbfsStore() + .getClient() instanceof AbfsDfsClient + && fs.getAbfsStore() + .getClientHandler() + .getIngressClient() instanceof AbfsBlobClient + && getIsNamespaceEnabled(fs)) { + totalMetrics += 2; } else { // create file + set properties + get properties requests totalMetrics += 4; @@ -1145,7 +1152,7 @@ private void runMetricsEmitTest(long threshold, boolean expectEmit) "Total requests should match counted requests when threshold is high") .isEqualTo(totalMetrics); // Wait for emit scheduler to run - Thread.sleep(totalWaitTime * 1000); // 20 seconds + Thread.sleep(totalWaitTime * 1000); // 30 seconds if (expectEmit) { Assertions.assertThat(currentTotal(fs)) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java index 16d3ca1db772c..f54c8ebe44fa1 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java @@ -34,7 +34,6 @@ import java.util.concurrent.atomic.AtomicLong; import org.assertj.core.api.Assertions; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.Mockito; @@ -83,7 +82,7 @@ private AbfsClient spyClient(AzureBlobFileSystem azureBlobFileSystem, Mockito.doAnswer(inv -> { counter.incrementAndGet(); - return inv.callRealMethod(); + return null; }).when(client).getMetricCall(Mockito.any()); return client; @@ -265,7 +264,8 @@ public void testDeregisterNonexistentClient() throws IOException { @Test public void testMultipleMetricCallsInCaseDataIsMoreThanBufferSize() throws Exception { - final int metricsDataSize1 = 927; // size of aggregated data for first 3 calls + final int metricsDataSize1 + = 927; // size of aggregated data for first 3 calls final int metricsDataSize2 = 115; // size of aggregated data for last call final int numberOfMetrics = 25; // total metrics to send AtomicInteger calls = new AtomicInteger(0); @@ -288,7 +288,7 @@ public void testMultipleMetricCallsInCaseDataIsMoreThanBufferSize() .isEqualTo(metricsDataSize2); } calls.incrementAndGet(); - return inv.callRealMethod(); + return null; }).when(client).getMetricCall(Mockito.any()); manager.registerClient("acc1", client); for (int i = 0; i < numberOfMetrics; i++) { @@ -312,7 +312,8 @@ public void testMultipleMetricCallsInCaseDataIsMoreThanBufferSize() * the global rate limiter enforces spacing between dispatches. */ @Test - public void testRateLimitMetricCalls() throws IOException, InterruptedException { + public void testRateLimitMetricCalls() + throws IOException, InterruptedException { final int permitsPerSecond = 3; final long minIntervalMs = 1_000 / permitsPerSecond; // 333ms final double toleranceMs = 15; // allow 15ms jitter @@ -700,7 +701,7 @@ private AbfsClient createSpiedClient(AtomicLong time) throws IOException { Mockito.doAnswer(inv -> { time.set(System.nanoTime()); - return inv.callRealMethod(); + return null; }).when(client).getMetricCall(Mockito.any()); return client; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java index 74419cc377f28..cee857e39282e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.azurebfs.utils; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.locks.LockSupport; import org.assertj.core.api.Assertions; @@ -99,7 +100,8 @@ void testRateLimitingDelay() throws InvalidConfigurationValueException { * require spacing). */ @Test - void testMultipleBurstCalls() throws InvalidConfigurationValueException { + void testMultipleBurstCalls() + throws InvalidConfigurationValueException, InterruptedException { final int permitsPerSecond = 10; final long minTimeAllowed = 350; final long maxTimeAllowed = 550; @@ -108,9 +110,18 @@ void testMultipleBurstCalls() throws InvalidConfigurationValueException { long totalStart = System.nanoTime(); + CountDownLatch latch = new CountDownLatch(5); for (int i = 0; i < 5; i++) { - limiter.acquire(); + new Thread(() -> { + try { + limiter.acquire(); + } finally { + latch.countDown(); + } + }).start(); } + //wait for all threads to finish + latch.await(); long totalMs = (System.nanoTime() - totalStart) / NANOS_PER_MILLISECOND; From c89af4368e382e7b9e0fd5f8372c22c6e155b25d Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Wed, 7 Jan 2026 03:03:58 -0800 Subject: [PATCH 07/10] Addressed Comments --- .../hadoop/fs/azurebfs/AbfsConfiguration.java | 18 +- .../hadoop/fs/azurebfs/AbfsCountersImpl.java | 18 +- .../fs/azurebfs/AzureBlobFileSystemStore.java | 9 +- .../azurebfs/constants/AbfsHttpConstants.java | 3 + .../azurebfs/constants/ConfigurationKeys.java | 56 ++- .../constants/FileSystemConfigurations.java | 6 +- .../fs/azurebfs/services/AbfsBlobClient.java | 5 +- .../fs/azurebfs/services/AbfsClient.java | 272 +----------- .../azurebfs/services/AbfsClientContext.java | 8 +- .../services/AbfsClientContextBuilder.java | 8 +- .../azurebfs/services/AbfsClientHandler.java | 17 +- .../fs/azurebfs/services/AbfsDfsClient.java | 5 +- .../azurebfs/services/AbfsMetricsManager.java | 392 ++++++++++++++++++ .../services/AbfsReadFooterMetrics.java | 27 +- .../azurebfs/services/AbfsRestOperation.java | 11 +- .../services/AggregateMetricsManager.java | 25 +- .../fs/azurebfs/services/MetricsBucket.java | 25 +- .../fs/azurebfs/utils/SimpleRateLimiter.java | 26 +- .../fs/azurebfs/utils/TracingContext.java | 2 +- .../azurebfs/utils/TracingHeaderFormat.java | 3 +- .../azurebfs/utils/TracingHeaderVersion.java | 11 +- .../azurebfs/ITestAbfsReadFooterMetrics.java | 4 +- .../fs/azurebfs/services/ITestAbfsClient.java | 83 ++-- .../ITestApacheClientConnectionPool.java | 4 +- .../fs/azurebfs/services/TestAbfsClient.java | 26 +- .../services/TestAbfsRestOperation.java | 4 +- .../services/TestAggregateMetricsManager.java | 119 +++--- .../azurebfs/utils/TestSimpleRateLimiter.java | 13 +- 28 files changed, 750 insertions(+), 450 deletions(-) create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index 4921284ba3def..38673fb5c278e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -324,11 +324,11 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_METRIC_ANALYSIS_TIMEOUT_MS) private int metricAnalysisTimeout; - @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_ACCOUNT_NAME, + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRICS_ACCOUNT_NAME, DefaultValue = EMPTY_STRING) private String metricAccount; - @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_ACCOUNT_KEY, + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRICS_ACCOUNT_KEY, DefaultValue = EMPTY_STRING) private String metricAccountKey; @@ -336,8 +336,8 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_METRICS_COLLECTION_ENABLED) private boolean metricsCollectionEnabled; - @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_SHOULD_EMIT_METRICS_ON_IDLE_TIME, - DefaultValue = DEFAULT_SHOULD_EMIT_METRICS_ON_IDLE_TIME) + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRICS_SHOULD_EMIT_ON_IDLE_TIME, + DefaultValue = DEFAULT_METRICS_SHOULD_EMIT_ON_IDLE_TIME) private boolean shouldEmitMetricsOnIdleTime; @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRICS_EMIT_THRESHOLD, @@ -352,12 +352,12 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_METRICS_EMIT_INTERVAL_MINS) private long metricsEmitIntervalInMins; - @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_MAX_METRICS_CALLS_PER_SECOND, - DefaultValue = DEFAULT_MAX_METRICS_CALLS_PER_SECOND) + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRICS_MAX_CALLS_PER_SECOND, + DefaultValue = DEFAULT_METRICS_MAX_CALLS_PER_SECOND) private int maxMetricsCallsPerSecond; - @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_BACKOFF_RETRY_METRICS_ENABLED, - DefaultValue = DEFAULT_BACKOFF_RETRY_METRICS_ENABLED) + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRICS_BACKOFF_RETRY_ENABLED, + DefaultValue = DEFAULT_METRICS_BACKOFF_RETRY_ENABLED) private boolean backoffRetryMetricsEnabled; @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ACCOUNT_OPERATION_IDLE_TIMEOUT, @@ -1438,7 +1438,7 @@ public TracingHeaderFormat getTracingHeaderFormat() { } public MetricFormat getMetricFormat() { - return getEnum(FS_AZURE_METRIC_FORMAT, MetricFormat.INTERNAL_METRIC_FORMAT); + return getEnum(FS_AZURE_METRICS_FORMAT, MetricFormat.INTERNAL_METRIC_FORMAT); } public AuthType getAuthType(String accountName) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java index aa2a4d3900f22..363ed34025a7f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java @@ -73,6 +73,7 @@ import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.SEND_REQUESTS; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.SERVER_UNAVAILABLE; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.WRITE_THROTTLES; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.enums.AbfsBackoffMetricsEnum.TOTAL_NUMBER_OF_REQUESTS; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore; import static org.apache.hadoop.util.Time.now; @@ -210,6 +211,7 @@ public void initializeMetrics(final MetricFormat metricFormat, break; case INTERNAL_FOOTER_METRIC_FORMAT: initializeReadFooterMetrics(); + break; case INTERNAL_METRIC_FORMAT: abfsBackoffMetrics = new AbfsBackoffMetrics( abfsConfiguration.isBackoffRetryMetricsEnabled()); @@ -226,15 +228,11 @@ public void initializeMetrics(final MetricFormat metricFormat, * create a new instance with the existing map. */ private void initializeReadFooterMetrics() { - if (abfsReadFooterMetrics == null) { - abfsReadFooterMetrics = new AbfsReadFooterMetrics(); - } else { - //In case metrics is emitted based on total count, there could be a chance - // that file type for which we have calculated the type will be lost. - // To avoid that, creating a new instance with existing map. - abfsReadFooterMetrics = new AbfsReadFooterMetrics( - abfsReadFooterMetrics.getFileTypeMetricsMap()); - } + abfsReadFooterMetrics = new AbfsReadFooterMetrics( + abfsReadFooterMetrics == null + ? null + : abfsReadFooterMetrics.getFileTypeMetricsMap() + ); } /** @@ -392,7 +390,7 @@ public DurationTracker trackDuration(String key) { @Override public String toString() { - String metric = ""; + String metric = EMPTY_STRING; if (abfsBackoffMetrics != null) { if (getAbfsBackoffMetrics().getMetricValue(TOTAL_NUMBER_OF_REQUESTS) > 0) { metric += "#BO:" + getAbfsBackoffMetrics().toString(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 296b8c0ed130a..43446aaf5a183 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -193,6 +193,7 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport { private final IdentityTransformerInterface identityTransformer; private final AbfsPerfTracker abfsPerfTracker; private final AbfsCounters abfsCounters; + private final String fileSystemId; /** * The set of directories where we should store files as append blobs. @@ -258,7 +259,8 @@ public AzureBlobFileSystemStore( boolean useHttps = (usingOauth || abfsConfiguration.isHttpsAlwaysUsed()) ? true : abfsStoreBuilder.isSecureScheme; this.abfsPerfTracker = new AbfsPerfTracker(fileSystemName, accountName, this.abfsConfiguration); this.abfsCounters = abfsStoreBuilder.abfsCounters; - initializeClient(uri, fileSystemName, accountName, useHttps, abfsStoreBuilder.fileSystemId); + this.fileSystemId = abfsStoreBuilder.fileSystemId; + initializeClient(uri, fileSystemName, accountName, useHttps); final Class identityTransformerClass = abfsStoreBuilder.configuration.getClass(FS_AZURE_IDENTITY_TRANSFORM_CLASS, IdentityTransformer.class, IdentityTransformerInterface.class); @@ -1717,7 +1719,7 @@ public boolean isInfiniteLeaseKey(String key) { * @throws IOException */ private void initializeClient(URI uri, String fileSystemName, - String accountName, boolean isSecure, String fileSystemId) + String accountName, boolean isSecure) throws IOException { if (this.getClient() != null) { return; @@ -1795,7 +1797,7 @@ private void initializeClient(URI uri, String fileSystemName, this.clientHandler = new AbfsClientHandler(baseUrl, creds, abfsConfiguration, tokenProvider, sasTokenProvider, encryptionContextProvider, - populateAbfsClientContext(), fileSystemId); + populateAbfsClientContext()); this.setClient(getClientHandler().getClient()); LOG.trace("AbfsClient init complete"); @@ -1826,6 +1828,7 @@ private AbfsClientContext populateAbfsClientContext() { new TailLatencyRequestTimeoutRetryPolicy(abfsConfiguration)) .withAbfsCounters(abfsCounters) .withAbfsPerfTracker(abfsPerfTracker) + .withFileSystemId(fileSystemId) .build(); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index 2057cd3e9fb8f..918997ab43b01 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -137,6 +137,9 @@ public final class AbfsHttpConstants { public static final String HASH = "#"; public static final String TRUE = "true"; public static final String ZERO = "0"; + public static final String UNDERSCORE = "_"; + public static final String OPENING_SQUARE_BRACKET = "["; + public static final String CLOSING_SQUARE_BRACKET = "]"; public static final String PLUS_ENCODE = "%20"; public static final String FORWARD_SLASH_ENCODE = "%2F"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index cc97658137010..c5eb9235fbb54 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.azurebfs.constants; +import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileSystem; @@ -69,16 +70,59 @@ public final class ConfigurationKeys { */ public static final String FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED = "fs.azure.account.expect.header.enabled"; public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME = "fs.azure.account.key"; - public static final String FS_AZURE_METRIC_ACCOUNT_NAME = "fs.azure.metric.account.name"; - public static final String FS_AZURE_METRIC_ACCOUNT_KEY = "fs.azure.metric.account.key"; - public static final String FS_AZURE_METRIC_FORMAT = "fs.azure.metric.format"; + + /** + * Config to set separate metrics account in case user don't want to use + * existing storage account for metrics collection. + * Value: {@value}. + */ + public static final String FS_AZURE_METRICS_ACCOUNT_NAME = "fs.azure.metrics.account.name"; + /** + * Config to set metrics account key for @FS_AZURE_METRICS_ACCOUNT_NAME. + * Value: {@value}. + */ + public static final String FS_AZURE_METRICS_ACCOUNT_KEY = "fs.azure.metrics.account.key"; + /** + * Config to set metrics format. Possible values are {@link MetricFormat} + * Value: {@value}. + */ + public static final String FS_AZURE_METRICS_FORMAT = "fs.azure.metrics.format"; + /** + * Config to enable or disable metrics collection. + * Value: {@value}. + */ public static final String FS_AZURE_METRICS_COLLECTION_ENABLED = "fs.azure.metrics.collection.enabled"; - public static final String FS_AZURE_SHOULD_EMIT_METRICS_ON_IDLE_TIME = "fs.azure.should.emit.metrics.on.idle.time"; + /** + * Config to enable or disable emitting metrics when idle time exceeds threshold. + * Value: {@value}. + */ + public static final String FS_AZURE_METRICS_SHOULD_EMIT_ON_IDLE_TIME = "fs.azure.metrics.should.emit.on.idle.time"; + /** + * Config to set threshold for emitting metrics when number of operations exceeds threshold. + * Value: {@value}. + */ public static final String FS_AZURE_METRICS_EMIT_THRESHOLD = "fs.azure.metrics.emit.threshold"; + /** + * Config to set interval in seconds to check for threshold breach for emitting metrics. + * If the number of operations exceed threshold within this interval, metrics will be emitted. + * Value: {@value}. + */ public static final String FS_AZURE_METRICS_EMIT_THRESHOLD_INTERVAL_SECS = "fs.azure.metrics.emit.threshold.interval.secs"; + /** + * Config to set interval in minutes for emitting metrics in regular time intervals. + * Value: {@value}. + */ public static final String FS_AZURE_METRICS_EMIT_INTERVAL_MINS = "fs.azure.metrics.emit.interval.mins"; - public static final String FS_AZURE_MAX_METRICS_CALLS_PER_SECOND = "fs.azure.max.metrics.calls.per.second"; - public static final String FS_AZURE_BACKOFF_RETRY_METRICS_ENABLED = "fs.azure.backoff.retry.metrics.enabled"; + /** + * Config to set maximum metrics calls per second. + * Value: {@value}. + */ + public static final String FS_AZURE_METRICS_MAX_CALLS_PER_SECOND = "fs.azure.metrics.max.calls.per.second"; + /** + * Config to enable or disable backoff retry metrics collection. + * Value: {@value}. + */ + public static final String FS_AZURE_METRICS_BACKOFF_RETRY_ENABLED = "fs.azure.metrics.backoff.retry.enabled"; public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME_REGX = "fs\\.azure\\.account\\.key\\.(.*)"; public static final String FS_AZURE_SECURE_MODE = "fs.azure.secure.mode"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index eccc9df990349..6f76f2e033c06 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -137,12 +137,12 @@ public final class FileSystemConfigurations { public static final int DEFAULT_METRIC_IDLE_TIMEOUT_MS = 60_000; public static final int DEFAULT_METRIC_ANALYSIS_TIMEOUT_MS = 60_000; public static final boolean DEFAULT_METRICS_COLLECTION_ENABLED = true; - public static final boolean DEFAULT_SHOULD_EMIT_METRICS_ON_IDLE_TIME = false; + public static final boolean DEFAULT_METRICS_SHOULD_EMIT_ON_IDLE_TIME = false; public static final long DEFAULT_METRICS_EMIT_THRESHOLD = 100_000L; public static final long DEFAULT_METRICS_EMIT_THRESHOLD_INTERVAL_SECS = 60; public static final long DEFAULT_METRICS_EMIT_INTERVAL_MINS = 60; - public static final int DEFAULT_MAX_METRICS_CALLS_PER_SECOND = 3; - public static final boolean DEFAULT_BACKOFF_RETRY_METRICS_ENABLED = false; + public static final int DEFAULT_METRICS_MAX_CALLS_PER_SECOND = 3; + public static final boolean DEFAULT_METRICS_BACKOFF_RETRY_ENABLED = false; public static final boolean DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = true; public static final int DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS = 60_000; public static final int DEFAULT_ANALYSIS_PERIOD_MS = 10_000; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java index 7173534cf1494..52fbd3182fdd5 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobClient.java @@ -188,10 +188,9 @@ public AbfsBlobClient(final URL baseUrl, final AccessTokenProvider tokenProvider, final SASTokenProvider sasTokenProvider, final EncryptionContextProvider encryptionContextProvider, - final AbfsClientContext abfsClientContext, - final String fileSystemId) throws IOException { + final AbfsClientContext abfsClientContext) throws IOException { super(baseUrl, sharedKeyCredentials, abfsConfiguration, tokenProvider, sasTokenProvider, - encryptionContextProvider, abfsClientContext, fileSystemId, AbfsServiceType.BLOB); + encryptionContextProvider, abfsClientContext, AbfsServiceType.BLOB); this.azureAtomicRenameDirSet = new HashSet<>(Arrays.asList( abfsConfiguration.getAzureAtomicRenameDirs() .split(AbfsHttpConstants.COMMA))); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 304b3e1d2f0dd..00898e21f00ae 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -36,14 +36,9 @@ import java.util.Hashtable; import java.util.List; import java.util.Locale; -import java.util.Timer; -import java.util.TimerTask; import java.util.concurrent.Callable; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -86,10 +81,8 @@ import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils; import org.apache.hadoop.fs.azurebfs.utils.EncryptionType; -import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; -import org.apache.hadoop.fs.azurebfs.utils.UriUtils; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.store.LogExactlyOnce; @@ -112,7 +105,6 @@ import static org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.extractEtagHeader; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APN_VERSION; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CLIENT_VERSION; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COLON; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DEFAULT_TIMEOUT; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FILESYSTEM; @@ -151,7 +143,6 @@ import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_VERSION; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_RESOURCE; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_TIMEOUT; -import static org.apache.hadoop.fs.azurebfs.enums.AbfsBackoffMetricsEnum.TOTAL_NUMBER_OF_REQUESTS; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.TAIL_LATENCY_REQUEST_TIMEOUT_ABBREVIATION; @@ -161,7 +152,6 @@ public abstract class AbfsClient implements Closeable { public static final Logger LOG = LoggerFactory.getLogger(AbfsClient.class); public static final String HUNDRED_CONTINUE_USER_AGENT = SINGLE_WHITE_SPACE + HUNDRED_CONTINUE + SEMICOLON; - public static final String ABFS_CLIENT_TIMER_THREAD_NAME = "abfs-timer-client"; public static final String FNS_BLOB_USER_AGENT_IDENTIFIER = "FNS"; private final URL baseUrl; @@ -185,14 +175,6 @@ public abstract class AbfsClient implements Closeable { private AccessTokenProvider tokenProvider; private SASTokenProvider sasTokenProvider; private final AbfsCounters abfsCounters; - private Timer timer; - private URL metricUrl; - private final AtomicBoolean isMetricCollectionEnabled - = new AtomicBoolean(false); - private MetricFormat metricFormat; - private final AtomicBoolean isMetricCollectionStopped; - private final int metricAnalysisPeriod; - private final int metricIdlePeriod; private EncryptionContextProvider encryptionContextProvider = null; private EncryptionType encryptionType = EncryptionType.NONE; private final AbfsThrottlingIntercept intercept; @@ -201,26 +183,15 @@ public abstract class AbfsClient implements Closeable { private final ListeningScheduledExecutorService executorService; private boolean renameResilience; - private TimerTask runningTimerTask; - private SharedKeyCredentials metricSharedkeyCredentials = null; - - /** - * True if metric account name and key are different from storage account. - * False otherwise. - */ - private boolean hasSeparateMetricAccount = false; private KeepAliveCache keepAliveCache; private AbfsApacheHttpClient abfsApacheHttpClient; - // Scheduler to emit aggregated metric based on time - private ScheduledExecutorService metricsEmitScheduler = null; + private final AbfsMetricsManager abfsMetricsManager; private final String fileSystemId; - private final AggregateMetricsManager aggregateMetricsManager; - private AbfsServiceType abfsServiceType; /** @@ -233,7 +204,6 @@ private AbfsClient(final URL baseUrl, final AbfsConfiguration abfsConfiguration, final EncryptionContextProvider encryptionContextProvider, final AbfsClientContext abfsClientContext, - final String fileSystemId, final AbfsServiceType abfsServiceType) throws IOException { this.baseUrl = baseUrl; this.sharedKeyCredentials = sharedKeyCredentials; @@ -249,10 +219,7 @@ private AbfsClient(final URL baseUrl, this.intercept = AbfsThrottlingInterceptFactory.getInstance(accountName, abfsConfiguration); this.tailLatencyTracker = AbfsTailLatencyTrackerFactory.getInstance(accountName, abfsConfiguration); this.renameResilience = abfsConfiguration.getRenameResilience(); - this.fileSystemId = fileSystemId; - this.aggregateMetricsManager = AggregateMetricsManager.get( - abfsConfiguration.getMetricsEmitIntervalInMins(), - abfsConfiguration.getMaxMetricsCallsPerSecond()); + this.fileSystemId = abfsClientContext.getFileSystemId(); this.abfsServiceType = abfsServiceType; if (encryptionContextProvider != null) { @@ -303,79 +270,12 @@ private AbfsClient(final URL baseUrl, new ThreadFactoryBuilder().setNameFormat("AbfsClient Lease Ops").setDaemon(true).build(); this.executorService = MoreExecutors.listeningDecorator( HadoopExecutors.newScheduledThreadPool(this.abfsConfiguration.getNumLeaseThreads(), tf)); - this.isMetricCollectionEnabled.set(abfsConfiguration.isMetricsCollectionEnabled()); - this.isMetricCollectionStopped = new AtomicBoolean(false); - this.metricAnalysisPeriod = abfsConfiguration.getMetricAnalysisTimeout(); - this.metricIdlePeriod = abfsConfiguration.getMetricIdleTimeout(); - if (isMetricCollectionEnabled()) { - try { - String metricAccountName = abfsConfiguration.getMetricAccount(); - String metricAccountKey = abfsConfiguration.getMetricAccountKey(); - this.metricFormat = abfsConfiguration.getMetricFormat(); - abfsCounters.initializeMetrics(metricFormat, getAbfsConfiguration()); - if (isNotEmpty(metricAccountName) && isNotEmpty( - metricAccountKey)) { - int dotIndex = metricAccountName.indexOf(AbfsHttpConstants.DOT); - if (dotIndex <= 0) { - throw new InvalidUriException( - metricAccountName + " - account name is not fully qualified."); - } - try { - metricSharedkeyCredentials = new SharedKeyCredentials( - metricAccountName.substring(0, dotIndex), - metricAccountKey); - hasSeparateMetricAccount = true; - setMetricsUrl(metricAccountName.startsWith(HTTPS_SCHEME) - ? metricAccountName : HTTPS_SCHEME + COLON - + FORWARD_SLASH + FORWARD_SLASH + metricAccountName); - } catch (IllegalArgumentException e) { - throw new IOException( - "Exception while initializing metric credentials ", e); - } - } else { - setMetricsUrl(baseUrlString.substring(0, indexLastForwardSlash + 1)); - } - // register the client to Aggregated Metrics Manager - this.aggregateMetricsManager.registerClient(accountName, this); - - // Metrics emitter scheduler - this.metricsEmitScheduler - = Executors.newSingleThreadScheduledExecutor(); - // run every 1 minute to check the metrics count - this.metricsEmitScheduler.scheduleAtFixedRate( - () -> { - if (getAbfsCounters().getAbfsBackoffMetrics() - .getMetricValue(TOTAL_NUMBER_OF_REQUESTS) - >= getAbfsConfiguration().getMetricsEmitThreshold()) { - emitCollectedMetrics(); - } - }, - abfsConfiguration.getMetricsEmitThresholdIntervalInSecs(), - abfsConfiguration.getMetricsEmitThresholdIntervalInSecs(), - TimeUnit.SECONDS); - - // run every metricInterval minutes - this.metricsEmitScheduler.scheduleAtFixedRate( - this::emitCollectedMetrics, - abfsConfiguration.getMetricsEmitIntervalInMins(), - abfsConfiguration.getMetricsEmitIntervalInMins(), - TimeUnit.MINUTES); - - // emit metrics based on idea time - if (abfsConfiguration.shouldEmitMetricsOnIdleTime()) { - this.timer = new Timer( - ABFS_CLIENT_TIMER_THREAD_NAME, true); - timer.schedule(new TimerTaskImpl(), - metricIdlePeriod, - metricIdlePeriod); - } - } catch (Exception e) { - LOG.error("Metrics disabled. Failed to initialize metrics for {}", - baseUrl, e); - this.isMetricCollectionEnabled.set(false); - } - } + this.abfsMetricsManager = new AbfsMetricsManager(abfsConfiguration, abfsCounters, + baseUrlString, indexLastForwardSlash, accountName, fileSystemId); + // register the client to Aggregated Metrics Manager + getAbfsMetricsManager().getAggregateMetricsManager() + .registerClient(accountName, this); // Initialize write thread pool metrics if dynamic write thread pool scaling is enabled. if (abfsConfiguration.isDynamicWriteThreadPoolEnablement()) { @@ -436,27 +336,19 @@ public AbfsClient(final URL baseUrl, final SASTokenProvider sasTokenProvider, final EncryptionContextProvider encryptionContextProvider, final AbfsClientContext abfsClientContext, - final String fileSystemId, final AbfsServiceType abfsServiceType) + final AbfsServiceType abfsServiceType) throws IOException { this(baseUrl, sharedKeyCredentials, abfsConfiguration, - encryptionContextProvider, abfsClientContext, fileSystemId, abfsServiceType); + encryptionContextProvider, abfsClientContext,abfsServiceType); this.sasTokenProvider = sasTokenProvider; this.tokenProvider = tokenProvider; } @Override public void close() throws IOException { - if (isMetricCollectionEnabled()) { - if (runningTimerTask != null) { - runningTimerTask.cancel(); - timer.cancel(); - } - if (metricsEmitScheduler != null && !metricsEmitScheduler.isShutdown()) { - metricsEmitScheduler.shutdownNow(); - } - emitCollectedMetrics(); - this.aggregateMetricsManager.deregisterClient(accountName, this); - } + getAbfsMetricsManager().closeMetricsResources(); + getAbfsMetricsManager().getAggregateMetricsManager() + .deregisterClient(accountName, this); if (keepAliveCache != null) { keepAliveCache.close(); } @@ -509,10 +401,6 @@ SharedKeyCredentials getSharedKeyCredentials() { return sharedKeyCredentials; } - SharedKeyCredentials getMetricSharedkeyCredentials() { - return metricSharedkeyCredentials; - } - public void setEncryptionType(EncryptionType encryptionType) { this.encryptionType = encryptionType; } @@ -534,10 +422,11 @@ AbfsThrottlingIntercept getIntercept() { } /** - * @return true if metric account name and key are different from storage account. + * @return AbfsMetricsManager instance. */ - public boolean hasSeparateMetricAccount() { - return hasSeparateMetricAccount; + @VisibleForTesting + public AbfsMetricsManager getAbfsMetricsManager() { + return abfsMetricsManager; } /** @@ -1649,71 +1538,6 @@ protected AccessTokenProvider getTokenProvider() { return tokenProvider; } - /** - * Retrieves a TracingContext object configured for metric tracking. - * This method creates a TracingContext object with the validated client correlation ID, - * the host name of the local machine (or "UnknownHost" if unable to determine), - * the file system operation type set to GET_ATTR, and additional configuration parameters - * for metric tracking. - * The TracingContext is intended for use in tracking metrics related to Azure Blob FileSystem (ABFS) operations. - * - * @return A TracingContext object configured for metric tracking. - */ - private synchronized String getMetricsData() { - String metrics = abfsCounters.toString(); - if (StringUtils.isEmpty(metrics)) { - return null; - } - abfsCounters.initializeMetrics(metricFormat, getAbfsConfiguration()); - return TracingContext.validateClientCorrelationID( - abfsConfiguration.getClientCorrelationId()) + COLON + fileSystemId - + COLON + metrics; - } - - /** - * Synchronized method to suspend or resume timer. - * @param timerFunctionality resume or suspend. - * @param timerTask The timertask object. - * @return true or false. - */ - boolean timerOrchestrator(TimerFunctionality timerFunctionality, TimerTask timerTask) { - switch (timerFunctionality) { - case RESUME: - if (isMetricCollectionEnabled() && isMetricCollectionStopped.get()) { - synchronized (this) { - if (isMetricCollectionStopped.get()) { - resumeTimer(); - } - } - } - break; - case SUSPEND: - long now = System.currentTimeMillis(); - long lastExecutionTime = abfsCounters.getLastExecutionTime().get(); - if (isMetricCollectionEnabled() && (now - lastExecutionTime >= metricAnalysisPeriod)) { - synchronized (this) { - if (!isMetricCollectionStopped.get()) { - timerTask.cancel(); - timer.purge(); - isMetricCollectionStopped.set(true); - return true; - } - } - } - break; - default: - break; - } - return false; - } - - private void resumeTimer() { - isMetricCollectionStopped.set(false); - timer.schedule(new TimerTaskImpl(), - metricIdlePeriod, - metricIdlePeriod); - } - /** * Initiates a metric call to the Azure Blob FileSystem (ABFS) for retrieving file system properties. * This method performs a HEAD request to the specified metric URL, using default headers and query parameters. @@ -1732,7 +1556,7 @@ public void getMetricCall(String metricsData) throws IOException { // Construct the URL for the metric call // In case of blob storage, the URL is changed to DFS URL - final URL url = createRequestUrl(getMetricsUrl(), + final URL url = createRequestUrl(getAbfsMetricsManager().getMetricsUrl(), EMPTY_STRING, abfsUriQueryBuilder.toString()); final AbfsRestOperation op = getAbfsRestOperation( AbfsRestOperationType.GetFileSystemProperties, @@ -1752,58 +1576,6 @@ public void getMetricCall(String metricsData) throws IOException { } } - public boolean isMetricCollectionEnabled() { - return isMetricCollectionEnabled.get(); - } - - /** - * Getter for metric URL. - * - * @return metricUrl - */ - @VisibleForTesting - public URL getMetricsUrl() { - return metricUrl; - } - - /** - * Setter for metric URL. - * Converts blob URL to dfs URL in case of blob storage account. - * - * @param urlString to be set as metricUrl. - * @throws IOException if URL is malformed. - */ - private void setMetricsUrl(String urlString) throws IOException { - metricUrl = UriUtils.changeUrlFromBlobToDfs(new URL(urlString)); - } - - class TimerTaskImpl extends TimerTask { - TimerTaskImpl() { - runningTimerTask = this; - } - @Override - public void run() { - if (timerOrchestrator(TimerFunctionality.SUSPEND, this)) { - emitCollectedMetrics(); - } - } - } - - /** - * Emits the collected metrics by making a metric call to the Azure Blob FileSystem (ABFS). - * This method checks if metric collection is enabled and, if so, attempts to perform - * a metric call using the configured tracing context. Any IOException encountered during - * the metric call is logged and ignored to prevent termination of the timer task. - * Finally, it re-initializes the metrics in the AbfsCounters instance using the specified - * metric format. - */ - public void emitCollectedMetrics() { - if (!isMetricCollectionEnabled()) { - return; - } - this.aggregateMetricsManager.recordMetric(accountName, getMetricsData()); - } - /** * Creates an AbfsRestOperation with additional parameters for buffer and SAS token. * @@ -1895,16 +1667,6 @@ KeepAliveCache getKeepAliveCache() { return keepAliveCache; } - @VisibleForTesting - protected Timer getTimer() { - return timer; - } - - @VisibleForTesting - ScheduledExecutorService getMetricsEmitScheduler() { - return metricsEmitScheduler; - } - protected String getUserAgent() { return userAgent; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContext.java index 0744d987a4dda..27b2d5996e02e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContext.java @@ -29,18 +29,20 @@ public class AbfsClientContext { private final TailLatencyRequestTimeoutRetryPolicy tailLatencyRequestTimeoutRetryPolicy; private final AbfsPerfTracker abfsPerfTracker; private final AbfsCounters abfsCounters; + private final String fileSystemId; AbfsClientContext( ExponentialRetryPolicy exponentialRetryPolicy, StaticRetryPolicy staticRetryPolicy, TailLatencyRequestTimeoutRetryPolicy tailLatencyRequestTimeoutRetryPolicy, AbfsPerfTracker abfsPerfTracker, - AbfsCounters abfsCounters) { + AbfsCounters abfsCounters, String fileSystemId) { this.exponentialRetryPolicy = exponentialRetryPolicy; this.staticRetryPolicy = staticRetryPolicy; this.tailLatencyRequestTimeoutRetryPolicy = tailLatencyRequestTimeoutRetryPolicy; this.abfsPerfTracker = abfsPerfTracker; this.abfsCounters = abfsCounters; + this.fileSystemId = fileSystemId; } public ExponentialRetryPolicy getExponentialRetryPolicy() { @@ -66,4 +68,8 @@ public AbfsPerfTracker getAbfsPerfTracker() { AbfsCounters getAbfsCounters() { return abfsCounters; } + + public String getFileSystemId() { + return fileSystemId; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContextBuilder.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContextBuilder.java index 3e02a49985884..5a175d2726301 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContextBuilder.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContextBuilder.java @@ -29,6 +29,7 @@ public class AbfsClientContextBuilder { private TailLatencyRequestTimeoutRetryPolicy tailLatencyRequestTimeoutRetryPolicy; private AbfsPerfTracker abfsPerfTracker; private AbfsCounters abfsCounters; + private String fileSystemId; public AbfsClientContextBuilder withExponentialRetryPolicy( final ExponentialRetryPolicy exponentialRetryPolicy) { @@ -59,6 +60,11 @@ public AbfsClientContextBuilder withAbfsCounters(final AbfsCounters abfsCounters return this; } + public AbfsClientContextBuilder withFileSystemId(final String fileSystemId) { + this.fileSystemId = fileSystemId; + return this; + } + /** * Build the context and get the instance with the properties selected. * @@ -71,6 +77,6 @@ public AbfsClientContext build() { staticRetryPolicy, tailLatencyRequestTimeoutRetryPolicy, abfsPerfTracker, - abfsCounters); + abfsCounters, fileSystemId); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientHandler.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientHandler.java index ea0a879305f99..393811c256bdd 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientHandler.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientHandler.java @@ -69,18 +69,17 @@ public AbfsClientHandler(final URL baseUrl, final AccessTokenProvider tokenProvider, final SASTokenProvider sasTokenProvider, final EncryptionContextProvider encryptionContextProvider, - final AbfsClientContext abfsClientContext, - final String fileSystemId) throws IOException { + final AbfsClientContext abfsClientContext) throws IOException { // This will initialize the default and ingress service types. // This is needed before creating the clients so that we can do cache warmup // only for default client. initServiceType(abfsConfiguration); this.dfsAbfsClient = createDfsClient(baseUrl, sharedKeyCredentials, abfsConfiguration, tokenProvider, sasTokenProvider, encryptionContextProvider, - abfsClientContext, fileSystemId); + abfsClientContext); this.blobAbfsClient = createBlobClient(baseUrl, sharedKeyCredentials, abfsConfiguration, tokenProvider, sasTokenProvider, encryptionContextProvider, - abfsClientContext, fileSystemId); + abfsClientContext); } /** @@ -155,8 +154,7 @@ private AbfsDfsClient createDfsClient(final URL baseUrl, final AccessTokenProvider tokenProvider, final SASTokenProvider sasTokenProvider, final EncryptionContextProvider encryptionContextProvider, - final AbfsClientContext abfsClientContext, - final String fileSystemId) throws IOException { + final AbfsClientContext abfsClientContext) throws IOException { URL dfsUrl = changeUrlFromBlobToDfs(baseUrl); LOG.debug( "Creating AbfsDfsClient with access token provider: %s and " @@ -164,7 +162,7 @@ private AbfsDfsClient createDfsClient(final URL baseUrl, tokenProvider, sasTokenProvider, dfsUrl); return new AbfsDfsClient(dfsUrl, creds, abfsConfiguration, tokenProvider, sasTokenProvider, encryptionContextProvider, - abfsClientContext, fileSystemId); + abfsClientContext); } /** @@ -186,8 +184,7 @@ private AbfsBlobClient createBlobClient(final URL baseUrl, final AccessTokenProvider tokenProvider, final SASTokenProvider sasTokenProvider, final EncryptionContextProvider encryptionContextProvider, - final AbfsClientContext abfsClientContext, - final String fileSystemId) throws IOException { + final AbfsClientContext abfsClientContext) throws IOException { URL blobUrl = changeUrlFromDfsToBlob(baseUrl); LOG.debug( "Creating AbfsBlobClient with access token provider: %s and " @@ -195,7 +192,7 @@ private AbfsBlobClient createBlobClient(final URL baseUrl, tokenProvider, sasTokenProvider, blobUrl); return new AbfsBlobClient(blobUrl, creds, abfsConfiguration, tokenProvider, sasTokenProvider, encryptionContextProvider, - abfsClientContext, fileSystemId); + abfsClientContext); } @Override diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java index 9d14028142964..5ddb9770ac56e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -172,10 +172,9 @@ public AbfsDfsClient(final URL baseUrl, final AccessTokenProvider tokenProvider, final SASTokenProvider sasTokenProvider, final EncryptionContextProvider encryptionContextProvider, - final AbfsClientContext abfsClientContext, - final String fileSystemId) throws IOException { + final AbfsClientContext abfsClientContext) throws IOException { super(baseUrl, sharedKeyCredentials, abfsConfiguration, tokenProvider, sasTokenProvider, - encryptionContextProvider, abfsClientContext, fileSystemId, AbfsServiceType.DFS); + encryptionContextProvider, abfsClientContext, AbfsServiceType.DFS); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java new file mode 100644 index 0000000000000..1751e0fdb931e --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java @@ -0,0 +1,392 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.net.URL; +import java.util.Timer; +import java.util.TimerTask; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException; +import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.UriUtils; + +import static org.apache.commons.lang3.StringUtils.isNotEmpty; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COLON; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME; +import static org.apache.hadoop.fs.azurebfs.enums.AbfsBackoffMetricsEnum.TOTAL_NUMBER_OF_REQUESTS; +import static org.apache.hadoop.fs.azurebfs.services.AbfsClient.LOG; + +/** + * AbfsMetricsManager is responsible for managing metrics collection + * and emission for an AbfsClient instance. + */ +public class AbfsMetricsManager { + + // Timer thread name for AbfsMetricsManager + public static final String ABFS_CLIENT_TIMER_THREAD_NAME + = "abfs-timer-client"; + + // Timer for scheduling metric emission tasks based on idle time + private Timer timer; + + // URL for sending metrics + private URL metricUrl; + + // Shared key credentials for metric account + private SharedKeyCredentials metricSharedkeyCredentials = null; + + // Currently running TimerTask + private TimerTask runningTimerTask; + + // Metric analysis periods + private final int metricAnalysisPeriod; + + // Metric idle period + private final int metricIdlePeriod; + + // Flag to indicate if a separate metric account is used + private boolean hasSeparateMetricAccount = false; + + // Flag to indicate if metric collection is enabled + private final AtomicBoolean isMetricCollectionEnabled + = new AtomicBoolean(false); + + // Metric format for metrics + private MetricFormat metricFormat; + + // Flag to indicate if metric collection is stopped + private final AtomicBoolean isMetricCollectionStopped; + + // AggregateMetricsManager instance + private final AggregateMetricsManager aggregateMetricsManager; + + // Scheduler to emit aggregated metric based on time + private ScheduledExecutorService metricsEmitScheduler = null; + + // AbfsConfiguration instance + final AbfsConfiguration abfsConfiguration; + + // AbfsCounters instance + final AbfsCounters abfsCounters; + + // File system ID + final String fileSystemId; + + // Storage account name + final String accountName; + + /** + * Constructor for AbfsMetricsManager. + * + * @param abfsConfiguration AbfsConfiguration object. + * @param abfsCounters AbfsCounters object. + * @param baseUrlString Base URL string of the AbfsClient. + * @param indexLastForwardSlash Index of last forward slash in the base URL string. + * @param accountName Storage account name. + * @param fileSystemId File system ID. + */ + public AbfsMetricsManager(final AbfsConfiguration abfsConfiguration, + final AbfsCounters abfsCounters, final String baseUrlString, + final int indexLastForwardSlash, final String accountName, + final String fileSystemId) { + this.abfsConfiguration = abfsConfiguration; + this.abfsCounters = abfsCounters; + this.fileSystemId = fileSystemId; + this.isMetricCollectionEnabled.set( + abfsConfiguration.isMetricsCollectionEnabled()); + this.isMetricCollectionStopped = new AtomicBoolean(false); + this.aggregateMetricsManager = AggregateMetricsManager.getInstance( + abfsConfiguration.getMetricsEmitIntervalInMins(), + abfsConfiguration.getMaxMetricsCallsPerSecond()); + this.metricAnalysisPeriod = abfsConfiguration.getMetricAnalysisTimeout(); + this.metricIdlePeriod = abfsConfiguration.getMetricIdleTimeout(); + this.accountName = accountName; + if (isMetricCollectionEnabled()) { + try { + String metricAccountName = abfsConfiguration.getMetricAccount(); + String metricAccountKey = abfsConfiguration.getMetricAccountKey(); + this.metricFormat = abfsConfiguration.getMetricFormat(); + if (isNotEmpty(metricAccountName) && isNotEmpty( + metricAccountKey)) { + int dotIndex = metricAccountName.indexOf(AbfsHttpConstants.DOT); + if (dotIndex <= 0) { + throw new InvalidUriException( + metricAccountName + " - account name is not fully qualified."); + } + try { + metricSharedkeyCredentials = new SharedKeyCredentials( + metricAccountName.substring(0, dotIndex), + metricAccountKey); + hasSeparateMetricAccount = true; + setMetricsUrl(metricAccountName.startsWith(HTTPS_SCHEME) + ? metricAccountName : HTTPS_SCHEME + COLON + + FORWARD_SLASH + FORWARD_SLASH + metricAccountName); + } catch (IllegalArgumentException e) { + throw new IOException( + "Exception while initializing metric credentials ", e); + } + } else { + setMetricsUrl(baseUrlString.substring(0, indexLastForwardSlash + 1)); + } + // Once the metric URL is set, initialize the metrics + abfsCounters.initializeMetrics(metricFormat, abfsConfiguration); + // Metrics emitter scheduler + this.metricsEmitScheduler + = Executors.newSingleThreadScheduledExecutor(); + // run every 1 minute to check the metrics count + this.metricsEmitScheduler.scheduleWithFixedDelay( + () -> { + if (abfsCounters.getAbfsBackoffMetrics() + .getMetricValue(TOTAL_NUMBER_OF_REQUESTS) + >= abfsConfiguration.getMetricsEmitThreshold()) { + emitCollectedMetrics(); + } + }, + abfsConfiguration.getMetricsEmitThresholdIntervalInSecs(), + abfsConfiguration.getMetricsEmitThresholdIntervalInSecs(), + TimeUnit.SECONDS); + + // run every metricInterval minutes + this.metricsEmitScheduler.scheduleWithFixedDelay( + this::emitCollectedMetrics, + abfsConfiguration.getMetricsEmitIntervalInMins(), + abfsConfiguration.getMetricsEmitIntervalInMins(), + TimeUnit.MINUTES); + + // emit metrics based on idea time + if (abfsConfiguration.shouldEmitMetricsOnIdleTime()) { + this.timer = new Timer( + ABFS_CLIENT_TIMER_THREAD_NAME, true); + timer.schedule(new TimerTaskImpl(), + metricIdlePeriod, + metricIdlePeriod); + } + } catch (Exception e) { + LOG.error("Metrics disabled. Failed to initialize metrics for {}", + baseUrlString, e); + this.isMetricCollectionEnabled.set(false); + } + } + } + + /** + * Closes the metrics resources. + * This method cancels any running timer tasks, shuts down the metrics emission scheduler, + * and emits any collected metrics before closing. + */ + public void closeMetricsResources() { + if (isMetricCollectionEnabled()) { + if (runningTimerTask != null) { + runningTimerTask.cancel(); + timer.cancel(); + } + if (metricsEmitScheduler != null && !metricsEmitScheduler.isShutdown()) { + metricsEmitScheduler.shutdownNow(); + } + emitCollectedMetrics(); + } + } + + /** + * Retrieves a TracingContext object configured for metric tracking. + * This method creates a TracingContext object with the validated client correlation ID, + * the host name of the local machine (or "UnknownHost" if unable to determine), + * the file system operation type set to GET_ATTR, and additional configuration parameters + * for metric tracking. + * The TracingContext is intended for use in tracking metrics related to Azure Blob FileSystem (ABFS) operations. + * + * @return A TracingContext object configured for metric tracking. + */ + private synchronized String getMetricsData() { + String metrics = abfsCounters.toString(); + if (StringUtils.isEmpty(metrics)) { + return null; + } + abfsCounters.initializeMetrics(metricFormat, abfsConfiguration); + return TracingContext.validateClientCorrelationID( + abfsConfiguration.getClientCorrelationId()) + COLON + fileSystemId + + COLON + metrics; + } + + /** + * Synchronized method to suspend or resume timer. + * @param timerFunctionality resume or suspend. + * @param timerTask The timertask object. + * @return true or false. + */ + boolean timerOrchestrator(TimerFunctionality timerFunctionality, + TimerTask timerTask) { + switch (timerFunctionality) { + case RESUME: + if (isMetricCollectionEnabled() && isMetricCollectionStopped.get()) { + synchronized (this) { + if (isMetricCollectionStopped.get()) { + resumeTimer(); + } + } + } + break; + case SUSPEND: + long now = System.currentTimeMillis(); + long lastExecutionTime = abfsCounters.getLastExecutionTime().get(); + if (isMetricCollectionEnabled() && (now - lastExecutionTime + >= metricAnalysisPeriod)) { + synchronized (this) { + if (!isMetricCollectionStopped.get()) { + timerTask.cancel(); + timer.purge(); + isMetricCollectionStopped.set(true); + return true; + } + } + } + break; + default: + break; + } + return false; + } + + /** + * Resumes the timer for metric collection. + * This method sets the isMetricCollectionStopped flag to false + * and schedules a new TimerTaskImpl to run at fixed intervals + * defined by the metricIdlePeriod. + */ + private void resumeTimer() { + isMetricCollectionStopped.set(false); + timer.schedule(new TimerTaskImpl(), + metricIdlePeriod, + metricIdlePeriod); + } + + /** + * Checks if metric collection is enabled. + * + * @return true if metric collection is enabled, false otherwise. + */ + public boolean isMetricCollectionEnabled() { + return isMetricCollectionEnabled.get() && fileSystemId != null; + } + + /** + * Getter for metric URL. + * + * @return metricUrl + */ + @VisibleForTesting + public URL getMetricsUrl() { + return metricUrl; + } + + /** + * Setter for metric URL. + * Converts blob URL to dfs URL in case of blob storage account. + * + * @param urlString to be set as metricUrl. + * @throws IOException if URL is malformed. + */ + private void setMetricsUrl(String urlString) throws IOException { + metricUrl = UriUtils.changeUrlFromBlobToDfs(new URL(urlString)); + } + + /** + * TimerTask implementation for emitting collected metrics based on ideal time. + * This class extends TimerTask and overrides the run method to + * check if the timer should be suspended based on the configured + * metric analysis period. If the timer is suspended, it triggers + * the emission of collected metrics. + */ + class TimerTaskImpl extends TimerTask { + + TimerTaskImpl() { + runningTimerTask = this; + } + + @Override + public void run() { + if (timerOrchestrator(TimerFunctionality.SUSPEND, this)) { + emitCollectedMetrics(); + } + } + } + + /** + * Emits the collected metrics by making a metric call to the Azure Blob FileSystem (ABFS). + * This method checks if metric collection is enabled and, if so, attempts to perform + * a metric call using the configured tracing context. Any IOException encountered during + * the metric call is logged and ignored to prevent termination of the timer task. + * Finally, it re-initializes the metrics in the AbfsCounters instance using the specified + * metric format. + */ + public void emitCollectedMetrics() { + if (!isMetricCollectionEnabled()) { + return; + } + this.aggregateMetricsManager.recordMetric(accountName, getMetricsData()); + } + + /** + * Getter for timer. + */ + @VisibleForTesting + protected Timer getTimer() { + return timer; + } + + /** + * Getter for metricsEmitScheduler. + */ + @VisibleForTesting + ScheduledExecutorService getMetricsEmitScheduler() { + return metricsEmitScheduler; + } + + /** + * @return true if metric account name and key are different from storage account. + */ + public boolean hasSeparateMetricAccount() { + return hasSeparateMetricAccount; + } + + /** + * Getter for metric shared key credentials. + */ + public SharedKeyCredentials getMetricSharedkeyCredentials() { + return metricSharedkeyCredentials; + } + + /** + * Getter for AggregateMetricsManager. + */ + public AggregateMetricsManager getAggregateMetricsManager() { + return aggregateMetricsManager; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java index 5f9db34867e00..84898954e5f4b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java @@ -37,6 +37,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_UNDERSCORE; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COLON; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.UNDERSCORE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_KB; import static org.apache.hadoop.fs.azurebfs.constants.MetricsConstants.CHAR_DOLLAR; import static org.apache.hadoop.fs.azurebfs.constants.MetricsConstants.DOUBLE_PRECISION_FORMAT; @@ -67,6 +68,7 @@ public class AbfsReadFooterMetrics extends AbstractAbfsStatisticsSource { private static final Logger LOG = LoggerFactory.getLogger( AbfsReadFooterMetrics.class); + // Footer length in KB to identify Parquet files. private static final String FOOTER_LENGTH = "20"; private static final List FILE_TYPE_LIST = @@ -109,6 +111,8 @@ private FileTypeMetrics() { /** * Updates the file type based on the metrics collected. + * In case the first two reads have equal size and offset differences, + * the file is classified as PARQUET; otherwise, it is classified as NON_PARQUET. */ private void updateFileType() { if (fileType == null) { @@ -127,7 +131,7 @@ && haveEqualValues(offsetDiffBetweenFirstAndSecondRead) * @return true if the value has equal parts, false otherwise */ private boolean haveEqualValues(String value) { - String[] parts = value.split("_"); + String[] parts = value.split(UNDERSCORE); return parts.length == 2 && parts[0].equals(parts[1]); } @@ -284,12 +288,10 @@ public AbfsReadFooterMetrics() { * @param fileTypeMetricsMap the map to track file type metrics */ public AbfsReadFooterMetrics(Map fileTypeMetricsMap) { - IOStatisticsStore ioStatisticsStore = iostatisticsStore() - .withCounters(getMetricNames(TYPE_COUNTER)) - .withMeanStatistics(getMetricNames(TYPE_MEAN)) - .build(); - setIOStatistics(ioStatisticsStore); - this.fileTypeMetricsMap = fileTypeMetricsMap; + this(); + this.fileTypeMetricsMap = fileTypeMetricsMap == null + ? new ConcurrentHashMap<>() + : fileTypeMetricsMap; } /** @@ -473,7 +475,7 @@ private void handleFirstRead(FileTypeMetrics fileTypeMetrics, fileTypeMetrics.setCollectMetricsForNextRead(true); fileTypeMetrics.setOffsetOfFirstRead(nextReadPos); fileTypeMetrics.setSizeReadByFirstRead( - len + "_" + Math.abs(contentLength - nextReadPos)); + len + UNDERSCORE + Math.abs(contentLength - nextReadPos)); } } @@ -495,7 +497,7 @@ private void handleSecondRead(FileTypeMetrics fileTypeMetrics, long offsetDiff = Math.abs( nextReadPos - fileTypeMetrics.getOffsetOfFirstRead()); fileTypeMetrics.setOffsetDiffBetweenFirstAndSecondRead( - len + "_" + offsetDiff); + len + UNDERSCORE + offsetDiff); fileTypeMetrics.setCollectLenMetrics(true); fileTypeMetrics.updateFileType(); updateMetricsData(fileTypeMetrics, len, contentLength); @@ -530,11 +532,12 @@ private void updateMetricsData(FileTypeMetrics fileTypeMetrics, int len, long contentLength) { long sizeReadByFirstRead = Long.parseLong( - fileTypeMetrics.getSizeReadByFirstRead().split("_")[0]); + fileTypeMetrics.getSizeReadByFirstRead().split(UNDERSCORE)[0]); long firstOffsetDiff = Long.parseLong( - fileTypeMetrics.getSizeReadByFirstRead().split("_")[1]); + fileTypeMetrics.getSizeReadByFirstRead().split(UNDERSCORE)[1]); long secondOffsetDiff = Long.parseLong( - fileTypeMetrics.getOffsetDiffBetweenFirstAndSecondRead().split("_")[1]); + fileTypeMetrics.getOffsetDiffBetweenFirstAndSecondRead().split( + UNDERSCORE)[1]); FileType fileType = fileTypeMetrics.getFileType(); addMeanMetricValue(fileType, AVG_READ_LEN_REQUESTED, len); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index 35d5fa078e776..1b55084fb4571 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -295,7 +295,10 @@ public void execute(TracingContext tracingContext) if (abfsCounters != null) { abfsCounters.getLastExecutionTime().set(now()); } - client.timerOrchestrator(TimerFunctionality.RESUME, null); + if (client.getAbfsMetricsManager() != null) { + client.getAbfsMetricsManager() + .timerOrchestrator(TimerFunctionality.RESUME, null); + } IOStatisticsBinding.trackDurationOfInvocation(abfsCounters, AbfsStatistic.getStatNameFromHttpCall(method), () -> completeExecute(lastUsedTracingContext)); @@ -583,8 +586,10 @@ private void registerApacheHttpClientIoException() { @VisibleForTesting public void signRequest(final AbfsHttpOperation httpOperation, int bytesToSign, boolean isMetricCall) throws IOException { - if (isMetricCall && client.hasSeparateMetricAccount()) { - client.getMetricSharedkeyCredentials().signRequest(httpOperation, bytesToSign); + if (isMetricCall && client.getAbfsMetricsManager() != null + && client.getAbfsMetricsManager().hasSeparateMetricAccount()) { + client.getAbfsMetricsManager().getMetricSharedkeyCredentials() + .signRequest(httpOperation, bytesToSign); } else { switch (client.getAuthType()) { case Custom: diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java index 9ba910f7e7c72..e8bf3c0e8138e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AggregateMetricsManager.java @@ -22,6 +22,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; @@ -60,7 +61,7 @@ private AggregateMetricsManager(final long dispatchIntervalInMins, "permitsPerSecond must be > 0"); } - this.rateLimiter = new SimpleRateLimiter(permitsPerSecond); + rateLimiter = new SimpleRateLimiter(permitsPerSecond); // Initialize scheduler for periodic dispatching of metrics. this.scheduler = Executors.newSingleThreadScheduledExecutor(r -> { @@ -70,7 +71,7 @@ private AggregateMetricsManager(final long dispatchIntervalInMins, }); // Schedule periodic dispatching of metrics. - this.scheduler.scheduleAtFixedRate( + this.scheduler.scheduleWithFixedDelay( this::dispatchMetrics, dispatchIntervalInMins, dispatchIntervalInMins, @@ -90,7 +91,7 @@ private AggregateMetricsManager(final long dispatchIntervalInMins, * @param permitsPerSecond Rate limit for dispatching metrics. * @return Singleton instance of AggregateMetricsManager. */ - public static AggregateMetricsManager get(final long dispatchIntervalInMins, + public static AggregateMetricsManager getInstance(final long dispatchIntervalInMins, final int permitsPerSecond) { if (instance != null) { return instance; @@ -136,17 +137,17 @@ public boolean deregisterClient(String account, AbfsClient abfsClient) { return false; } - MetricsBucket bucket = buckets.get(account); - if (bucket == null) { - return false; - } + AtomicBoolean isRemoved = new AtomicBoolean(false); - boolean isRemoved = bucket.deregisterClient(abfsClient); + buckets.computeIfPresent(account, (key, bucket) -> { + // Deregister the client + isRemoved.set(bucket.deregisterClient(abfsClient)); - if (bucket.isEmpty()) { - buckets.remove(account, bucket); - } - return isRemoved; + // If bucket became empty, remove it atomically + return bucket.isEmpty() ? null : bucket; + }); + + return isRemoved.get(); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/MetricsBucket.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/MetricsBucket.java index c3b555161264b..47f72c581cc5e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/MetricsBucket.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/MetricsBucket.java @@ -25,16 +25,27 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.fs.azurebfs.utils.SimpleRateLimiter; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CLOSING_SQUARE_BRACKET; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COLON; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.OPENING_SQUARE_BRACKET; + /** * MetricsBucket holds metrics for multiple AbfsClients and * dispatches them in batches, respecting rate limits. */ final class MetricsBucket { + // Logger for the class. + private static final Logger LOG = LoggerFactory.getLogger(MetricsBucket.class); + // Rate limiter to control the rate of dispatching metrics. private final SimpleRateLimiter rateLimiter; @@ -134,16 +145,18 @@ private void sendMetrics(AbfsClient client, // Send outside synchronized block if (client != null && batchToSend != null && !batchToSend.isEmpty()) { for (String chunk : splitListBySize(batchToSend, MAX_HEADER_SIZE)) { - rateLimiter.acquire(); // Rate limiting + rateLimiter.acquire(5, TimeUnit.SECONDS); // Rate limiting try { client.getMetricCall(chunk); - } catch (IOException ignored) {} + } catch (IOException ignored) { + LOG.debug("Failed to send metrics: {}", ignored.getMessage()); + } } } } // Check if there are no registered clients - public boolean isEmpty() { + public synchronized boolean isEmpty() { return clients.isEmpty(); } @@ -161,8 +174,8 @@ private static List splitListBySize( List result = new ArrayList<>(); StringBuilder sb = new StringBuilder(); - for (String s : new ArrayList<>(items)) { - String wrapped = "[" + s + "]"; + for (String s : items) { + String wrapped = OPENING_SQUARE_BRACKET + s + CLOSING_SQUARE_BRACKET; int additional = sb.length() == 0 ? wrapped.length() : wrapped.length() + 1; @@ -182,7 +195,7 @@ private static List splitListBySize( } if (sb.length() > 0) { - sb.append(':'); + sb.append(COLON); } sb.append(wrapped); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java index b46759d3f1552..c8d5744d59744 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/SimpleRateLimiter.java @@ -18,10 +18,16 @@ package org.apache.hadoop.fs.azurebfs.utils; +import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.LockSupport; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; +/** + * A simple rate limiter that allows a specified number of permits + * per second. This implementation uses basic synchronization and + * LockSupport for waiting. + */ public final class SimpleRateLimiter { // Interval between permits in nanoseconds. @@ -52,9 +58,18 @@ public SimpleRateLimiter(int permitsPerSecond) } /** - * Acquires a permit from the rate limiter, blocking until one is available. + * Acquires a permit from the rate limiter, waiting up to the + * specified timeout if necessary. + * + * @param timeout Maximum time to wait for a permit. + * @param unit Time unit of the timeout argument. */ - public synchronized void acquire() { + public synchronized void acquire(long timeout, TimeUnit unit) { + if (timeout <= 0) { + return; + } + + final long deadline = System.nanoTime() + unit.toNanos(timeout); while (true) { long now = System.nanoTime(); long wait = nextAllowedTime - now; @@ -64,7 +79,12 @@ public synchronized void acquire() { return; } - LockSupport.parkNanos(wait); + long remaining = deadline - now; + if (remaining <= 0) { + return; // timeout expired + } + + LockSupport.parkNanos(Math.min(wait, remaining)); if (Thread.interrupted()) { Thread.currentThread().interrupt(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java index 265d72314471b..290a63bc014e1 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java @@ -241,7 +241,7 @@ public void constructHeader(AbfsHttpOperation httpOperation, String previousFail + operatedBlobCount + COLON + getOperationSpecificHeader(opType) + COLON + httpOperation.getTracingContextSuffix() + COLON - + metricResults + COLON + resourceUtilizationMetricResults; + + resourceUtilizationMetricResults; break; case TWO_ID_FORMAT: header = TracingHeaderVersion.getCurrentVersion() + COLON diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderFormat.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderFormat.java index 55c45f3276575..a8a22fcd0c390 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderFormat.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderFormat.java @@ -26,6 +26,5 @@ public enum TracingHeaderFormat { ALL_ID_FORMAT, // :: // :::: - AGGREGATED_METRICS_FORMAT; // : - // ::: } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderVersion.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderVersion.java index dc05e8f251ee5..d989b79250c9b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderVersion.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderVersion.java @@ -45,11 +45,16 @@ public enum TracingHeaderVersion { * Schema: version:clientCorrelationId:clientRequestId:fileSystemId * :primaryRequestId:streamId:opType:retryHeader:ingressHandler * :position:operatedBlobCount:operationSpecificHeader:httpOperationHeader - * :aggregatedMetrics:resourceUtilizationMetrics + * :resourceUtilizationMetrics */ - V2("v2", 15), + V2("v2", 14), /** - * Metrics to client request id header. + * Version 0 of the aggregated metrics tracing header, which includes + * a version prefix and has 3 permanent fields. + * This version is used for the aggregated metrics tracing header schema. + * Schema: metricsVersion:List + * where AggregatedMetrics = clientCorrelationId:fileSystemId:aggregated-metrics + * and AggregatedMetrics is enclosed within [] and separated by : */ AV0("av0", 3); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadFooterMetrics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadFooterMetrics.java index 81a6b2d35f90f..ac229afd6558e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadFooterMetrics.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadFooterMetrics.java @@ -21,7 +21,7 @@ import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_INFO; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_READ_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_WRITE_BUFFER_SIZE; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_FORMAT; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_KB; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; @@ -78,7 +78,7 @@ public void testReadFooterMetricsWithParquetAndNonParquet() throws Exception { */ private Configuration getConfiguration(int bufferSize) { final Configuration configuration = getRawConfiguration(); - configuration.set(FS_AZURE_METRIC_FORMAT, String.valueOf(MetricFormat.INTERNAL_FOOTER_METRIC_FORMAT)); + configuration.set(FS_AZURE_METRICS_FORMAT, String.valueOf(MetricFormat.INTERNAL_FOOTER_METRIC_FORMAT)); configuration.setInt(AZURE_READ_BUFFER_SIZE, bufferSize); configuration.setInt(AZURE_WRITE_BUFFER_SIZE, bufferSize); return configuration; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index 4ac364b3df8ff..635925bdc0b68 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -27,7 +27,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.regex.Pattern; @@ -41,6 +40,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; @@ -56,6 +56,7 @@ import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsApacheHttpExpect100Exception; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TokenAccessProviderException; import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; import org.apache.hadoop.fs.azurebfs.enums.AbfsBackoffMetricsEnum; @@ -66,6 +67,7 @@ import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; import org.apache.hadoop.fs.azurebfs.utils.UriUtils; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; +import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.test.ReflectionUtils; import org.apache.http.HttpClientConnection; import org.apache.http.HttpResponse; @@ -89,9 +91,9 @@ import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ALWAYS_USE_HTTPS; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_COLLECTION_ENABLED; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_EMIT_THRESHOLD_INTERVAL_SECS; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_KEY; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_ACCOUNT_NAME; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_ACCOUNT_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_ACCOUNT_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_FORMAT; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_EMIT_THRESHOLD; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JAVA_VENDOR; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JAVA_VERSION; @@ -199,10 +201,10 @@ private String getUserAgentString(AbfsConfiguration config, AbfsClient client; if (AbfsServiceType.DFS.equals(config.getFsConfiguredServiceType())) { client = new AbfsDfsClient(new URL("https://azure.com"), null, - config, (AccessTokenProvider) null, null, null, abfsClientContext, UUID.randomUUID().toString()); + config, (AccessTokenProvider) null, null, null, abfsClientContext); } else { client = new AbfsBlobClient(new URL("https://azure.com"), null, - config, (AccessTokenProvider) null, null, null, abfsClientContext, UUID.randomUUID().toString()); + config, (AccessTokenProvider) null, null, null, abfsClientContext); } String sslProviderName = null; if (includeSSLProvider) { @@ -457,7 +459,7 @@ public static AbfsClient createTestClientFromCurrentContext( : null), null, null, - abfsClientContext, UUID.randomUUID().toString()); + abfsClientContext); } else { testClient = new AbfsBlobClient( baseAbfsClientInstance.getBaseUrl(), @@ -473,7 +475,7 @@ public static AbfsClient createTestClientFromCurrentContext( : null), null, null, - abfsClientContext, UUID.randomUUID().toString()); + abfsClientContext); } return testClient; @@ -511,7 +513,7 @@ public static AbfsClient createBlobClientFromCurrentContext( : null), null, null, - abfsClientContext, UUID.randomUUID().toString()); + abfsClientContext); return testClient; } @@ -907,9 +909,9 @@ public void testMetricAccountFallback() throws Exception { configuration.setBoolean( AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, false); configuration.setBoolean(FS_AZURE_ACCOUNT_IS_HNS_ENABLED, false); - configuration.unset(FS_AZURE_METRIC_ACCOUNT_NAME); - configuration.unset(FS_AZURE_METRIC_ACCOUNT_KEY); - configuration.unset(FS_AZURE_METRIC_FORMAT); + configuration.unset(FS_AZURE_METRICS_ACCOUNT_NAME); + configuration.unset(FS_AZURE_METRICS_ACCOUNT_KEY); + configuration.unset(FS_AZURE_METRICS_FORMAT); configuration.setBoolean(FS_AZURE_ALWAYS_USE_HTTPS, true); final AzureBlobFileSystem fs = getFileSystem(configuration); Assertions.assertThat( @@ -920,7 +922,7 @@ public void testMetricAccountFallback() throws Exception { .isEqualTo(MetricFormat.INTERNAL_METRIC_FORMAT); Assertions.assertThat( - fs.getAbfsStore().getClient().isMetricCollectionEnabled()) + fs.getAbfsStore().getClient().getAbfsMetricsManager().isMetricCollectionEnabled()) .describedAs( "Metric collection should be enabled even if metric account is not set") .isTrue(); @@ -943,13 +945,48 @@ public void testMetricAccountFallback() throws Exception { uriBuilder.setScheme(FileSystemUriSchemes.HTTPS_SCHEME); uriBuilder.setHost(fs.getUri().getHost()); uriBuilder.setPath(FORWARD_SLASH); - Assertions.assertThat(fs.getAbfsStore().getClient().getMetricsUrl()) + Assertions.assertThat(fs.getAbfsStore().getClient().getAbfsMetricsManager().getMetricsUrl()) .describedAs( "In case metric account is not set, account url should be used") .isEqualTo( UriUtils.changeUrlFromBlobToDfs(uriBuilder.build().toURL())); } + @Test + public void testInvalidMetricAccount() throws Exception { + Configuration configuration = getRawConfiguration(); + configuration.setBoolean( + AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, false); + configuration.setBoolean(FS_AZURE_ACCOUNT_IS_HNS_ENABLED, false); + configuration.set(FS_AZURE_METRICS_ACCOUNT_NAME, "invalidAccountName!"); + configuration.set(FS_AZURE_METRICS_ACCOUNT_KEY, "invalidAccountKey!"); + configuration.unset(FS_AZURE_METRICS_FORMAT); // Use default metric format + // Filesystem initialization should not fail if metric account is invalid + try (AzureBlobFileSystem fs = getFileSystem(configuration)) { + // Incase metric account is invalid, metric collection should be disabled + Assertions.assertThat( + fs.getAbfsStore() + .getClient() + .getAbfsMetricsManager() + .isMetricCollectionEnabled()) + .describedAs( + "In case metric account is invalid, metric collection should be disabled") + .isFalse(); + fs.create(new Path("/testPath")); + FileStatus fileStatus = fs.getFileStatus(new Path("/testPath")); + Assertions.assertThat(fileStatus) + .describedAs("FileStatus should be returned for the created path") + .isNotNull(); + // Get metrics and it should be null + Assertions.assertThat( + fs.getAbfsStore().getClient().getAbfsCounters().toString()) + .describedAs( + "In case metric account is invalid, no metrics should be collected") + .doesNotContain("#BO:") + .doesNotContain("#FO:"); + } + } + /** * Test to verify that in case metric format is set to empty, * metric collection is disabled. @@ -963,13 +1000,13 @@ public void testMetricCollectionWithDifferentMetricFormat() throws Exception { configuration.setBoolean( AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, true); configuration.setBoolean(FS_AZURE_ACCOUNT_IS_HNS_ENABLED, false); - configuration.unset(FS_AZURE_METRIC_FORMAT); - configuration.setEnum(FS_AZURE_METRIC_FORMAT, + configuration.unset(FS_AZURE_METRICS_FORMAT); + configuration.setEnum(FS_AZURE_METRICS_FORMAT, INTERNAL_BACKOFF_METRIC_FORMAT); final AzureBlobFileSystem fs = getFileSystem(configuration); int totalCalls = 1; // Filesystem initialization call Assertions.assertThat( - fs.getAbfsStore().getClient().isMetricCollectionEnabled()) + fs.getAbfsStore().getClient().getAbfsMetricsManager().isMetricCollectionEnabled()) .describedAs("Metric collection should be enabled by default") .isTrue(); @@ -1047,13 +1084,13 @@ public void testGetMetricsCallMethod() throws Exception { fs.getAbfsStore()).getAbfsRestOperation( AbfsRestOperationType.GetFileSystemProperties, HTTP_METHOD_HEAD, - fs.getAbfsStore().getClient().getMetricsUrl(), + fs.getAbfsStore().getClient().getAbfsMetricsManager().getMetricsUrl(), getTestRequestHeaders(fs.getAbfsStore().getClient())) .createHttpOperation(); tracingContext.constructHeader(abfsHttpOperation, null, EXPONENTIAL_RETRY_POLICY_ABBREVIATION); assertThat(abfsHttpOperation.getClientRequestId()) - .describedAs("ClientRequestId should be contains Backoff metrics") + .describedAs("ClientRequestId should be containing Backoff metrics") .contains("#BO:"); } @@ -1173,7 +1210,7 @@ public void testAggregateMetricsConfigs() throws Exception { // Disabling the aggregate metrics collection configuration.setBoolean(FS_AZURE_METRICS_COLLECTION_ENABLED, false); AzureBlobFileSystem fs = this.getFileSystem(configuration); - Assertions.assertThat(fs.getAbfsStore().getClient().getMetricsEmitScheduler()) + Assertions.assertThat(fs.getAbfsStore().getClient().getAbfsMetricsManager().getMetricsEmitScheduler()) .describedAs("Since metrics collection is not enabled, " + "scheduler should not be initialised") .isNull(); @@ -1181,7 +1218,7 @@ public void testAggregateMetricsConfigs() throws Exception { // Disabling the aggregate metrics collection configuration.setBoolean(FS_AZURE_METRICS_COLLECTION_ENABLED, true); fs = this.getFileSystem(configuration); - Assertions.assertThat(fs.getAbfsStore().getClient().getMetricsEmitScheduler()) + Assertions.assertThat(fs.getAbfsStore().getClient().getAbfsMetricsManager().getMetricsEmitScheduler()) .describedAs("Since metrics collection is not enabled, " + "scheduler should initialised") .isNotNull(); @@ -1314,7 +1351,7 @@ public void testStaleConnectionBehavior() throws Exception { Configuration conf = fs.getConf(); // This is to avoid actual metric calls during the test - conf.unset(FS_AZURE_METRIC_ACCOUNT_NAME); + conf.unset(FS_AZURE_METRICS_ACCOUNT_NAME); // Initialize the file system AzureBlobFileSystemStore store = this.getFileSystem(conf).getAbfsStore(); @@ -1561,6 +1598,4 @@ private void checkConnectionReuse(AbfsClient abfsClient) throws IOException { .isEqualTo(keepAliveCache.peekLast()); } } - - } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestApacheClientConnectionPool.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestApacheClientConnectionPool.java index db5f596bad828..422d4cc945bb9 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestApacheClientConnectionPool.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestApacheClientConnectionPool.java @@ -51,7 +51,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JDK_FALLBACK; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.KEEP_ALIVE_CACHE_CLOSED; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_FORMAT; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_NETWORKING_LIBRARY; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME; import static org.apache.hadoop.fs.azurebfs.constants.HttpOperationType.APACHE_HTTP_CLIENT; @@ -74,7 +74,7 @@ public ITestApacheClientConnectionPool() throws Exception { public void testKacIsClosed() throws Throwable { Configuration configuration = new Configuration(getRawConfiguration()); configuration.set(FS_AZURE_NETWORKING_LIBRARY, APACHE_HTTP_CLIENT.name()); - configuration.unset(FS_AZURE_METRIC_FORMAT); + configuration.unset(FS_AZURE_METRICS_FORMAT); try (AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance( configuration)) { KeepAliveCache kac = fs.getAbfsStore() diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java index 0db7f51986db3..8b0d7d728f562 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java @@ -31,9 +31,9 @@ import org.apache.hadoop.fs.azurebfs.MockIntercept; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SHOULD_EMIT_METRICS_ON_IDLE_TIME; -import static org.apache.hadoop.fs.azurebfs.services.AbfsClient.ABFS_CLIENT_TIMER_THREAD_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_FORMAT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_SHOULD_EMIT_ON_IDLE_TIME; +import static org.apache.hadoop.fs.azurebfs.services.AbfsMetricsManager.ABFS_CLIENT_TIMER_THREAD_NAME; import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doAnswer; @@ -57,11 +57,12 @@ public class TestAbfsClient { public void testTimerInitializationWithoutMetricCollection() throws Exception { final Configuration configuration = new Configuration(); AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, ACCOUNT_NAME); - abfsConfiguration.unset(FS_AZURE_METRIC_FORMAT); - configuration.setBoolean(FS_AZURE_SHOULD_EMIT_METRICS_ON_IDLE_TIME, false); + abfsConfiguration.unset(FS_AZURE_METRICS_FORMAT); + configuration.setBoolean(FS_AZURE_METRICS_SHOULD_EMIT_ON_IDLE_TIME, false); AbfsCounters abfsCounters = spy(new AbfsCountersImpl(new URI("abcd"))); - AbfsClientContext abfsClientContext = new AbfsClientContextBuilder().withAbfsCounters(abfsCounters).build(); + AbfsClientContext abfsClientContext = new AbfsClientContextBuilder().withAbfsCounters(abfsCounters) + .withFileSystemId(UUID.randomUUID().toString()).build(); // Get an instance of AbfsClient. AbfsClient client = new AbfsDfsClient(new URL("https://" + ACCOUNT_NAME + "/"), @@ -70,9 +71,9 @@ public void testTimerInitializationWithoutMetricCollection() throws Exception { (AccessTokenProvider) null, null, null, - abfsClientContext, UUID.randomUUID().toString()); + abfsClientContext); - assertThat(client.getTimer()) + assertThat(client.getAbfsMetricsManager().getTimer()) .describedAs("Timer should not be initialized") .isNull(); @@ -92,11 +93,12 @@ public void testTimerInitializationWithoutMetricCollection() throws Exception { @Test public void testTimerInitializationWithMetricCollection() throws Exception { final Configuration configuration = new Configuration(); - configuration.setBoolean(FS_AZURE_SHOULD_EMIT_METRICS_ON_IDLE_TIME, true); + configuration.setBoolean(FS_AZURE_METRICS_SHOULD_EMIT_ON_IDLE_TIME, true); AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, ACCOUNT_NAME); AbfsCounters abfsCounters = spy(new AbfsCountersImpl(new URI("abcd"))); - AbfsClientContext abfsClientContext = new AbfsClientContextBuilder().withAbfsCounters(abfsCounters).build(); + AbfsClientContext abfsClientContext = new AbfsClientContextBuilder().withAbfsCounters(abfsCounters) + .withFileSystemId(UUID.randomUUID().toString()).build(); // Get an instance of AbfsClient. AbfsClient client = new AbfsDfsClient(new URL("https://" + ACCOUNT_NAME + "/"), @@ -105,9 +107,9 @@ public void testTimerInitializationWithMetricCollection() throws Exception { (AccessTokenProvider) null, null, null, - abfsClientContext, UUID.randomUUID().toString()); + abfsClientContext); - assertThat(client.getTimer()) + assertThat(client.getAbfsMetricsManager().getTimer()) .describedAs("Timer should be initialized") .isNotNull(); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java index a15c16118ef96..352108de11f51 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java @@ -26,7 +26,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; import static org.apache.hadoop.fs.azurebfs.enums.AbfsBackoffMetricsEnum.NUMBER_OF_REQUESTS_FAILED; import static org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType.DeletePath; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRICS_FORMAT; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; import java.util.ArrayList; @@ -51,7 +51,7 @@ public TestAbfsRestOperation() throws Exception { public void testBackoffRetryMetrics() throws Exception { // Create an AzureBlobFileSystem instance. final Configuration configuration = getRawConfiguration(); - configuration.set(FS_AZURE_METRIC_FORMAT, String.valueOf(MetricFormat.INTERNAL_BACKOFF_METRIC_FORMAT)); + configuration.set(FS_AZURE_METRICS_FORMAT, String.valueOf(MetricFormat.INTERNAL_BACKOFF_METRIC_FORMAT)); final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration); AbfsConfiguration abfsConfiguration = fs.getAbfsStore().getAbfsConfiguration(); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java index f54c8ebe44fa1..6e7c5fe609426 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java @@ -30,6 +30,7 @@ import java.util.List; import java.util.UUID; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; @@ -45,12 +46,15 @@ public class TestAggregateMetricsManager extends AbstractAbfsIntegrationTest { - /** Number of nanoseconds in one millisecond. */ + // Number of nanoseconds in one millisecond. private static final long NANOS_PER_MILLISECOND = 1_000_000L; // The manager under test private final AggregateMetricsManager manager; + // Rate limit permits per second for testing + private final int permitsPerSecond = 3; + /** * Constructor for TestAggregateMetricsManager. * @@ -58,7 +62,7 @@ public class TestAggregateMetricsManager extends AbstractAbfsIntegrationTest { */ public TestAggregateMetricsManager() throws Exception { super(); - manager = AggregateMetricsManager.get(10, 3); + manager = AggregateMetricsManager.getInstance(10, permitsPerSecond); } /** @@ -314,9 +318,8 @@ public void testMultipleMetricCallsInCaseDataIsMoreThanBufferSize() @Test public void testRateLimitMetricCalls() throws IOException, InterruptedException { - final int permitsPerSecond = 3; final long minIntervalMs = 1_000 / permitsPerSecond; // 333ms - final double toleranceMs = 15; // allow 15ms jitter + final double toleranceMs = 50; // allow 50ms jitter final int numClients = 10; // Store timestamps for each client @@ -373,9 +376,6 @@ public void testRateLimitMetricCalls() @Test public void testAggregatedMetricsManagerWithJVMExit0() throws IOException, InterruptedException { - // ------------------------------- - // Program 1 (kept exactly as you asked) - // ------------------------------- String program = "import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;\n" + "import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore;\n" @@ -392,7 +392,7 @@ public void testAggregatedMetricsManagerWithJVMExit0() + "public class ShutdownTestProg {\n" + " public static void main(String[] args) throws Exception {\n" + " AtomicInteger calls1 = new AtomicInteger();\n" - + " AggregateMetricsManager mgr = AggregateMetricsManager.get(10, 3);\n" + + " AggregateMetricsManager mgr = AggregateMetricsManager.getInstance(10, 3);\n" + "\n" + " URI uri = new URI(\"abfss://test@manishtestfnsnew.dfs.core.windows.net\");\n" + " Configuration config = new Configuration();\n" @@ -437,9 +437,6 @@ public void testAggregatedMetricsManagerWithJVMExit0() @Test public void testAggregatedMetricsManagerWithJVMExit1() throws IOException, InterruptedException { - // ------------------------------- - // Program 2 (final, multi-FS version) - // ------------------------------- String program = "import org.apache.hadoop.fs.azurebfs.services.*;\n" + "import org.apache.hadoop.fs.azurebfs.utils.*;\n" @@ -455,7 +452,7 @@ public void testAggregatedMetricsManagerWithJVMExit1() + "\n" + "public class ShutdownTestProg {\n" + " public static void main(String[] args) throws Exception {\n" - + " AggregateMetricsManager mgr = AggregateMetricsManager.get(10, 3);\n" + + " AggregateMetricsManager mgr = AggregateMetricsManager.getInstance(10, 3);\n" + "\n" + " AtomicInteger calls1 = new AtomicInteger();\n" + " AtomicInteger calls2 = new AtomicInteger();\n" @@ -521,9 +518,6 @@ public void testAggregatedMetricsManagerWithJVMExit1() @Test void testAggregatedMetricsManagerWithJVMCrash() throws Exception { final int crashExitCode = 134; - // ------------------------------- - // Program 3 (JVM Crash) - // ------------------------------- String program = "import org.apache.hadoop.fs.azurebfs.services.*;\n" + "import org.apache.hadoop.fs.azurebfs.utils.*;\n" @@ -541,7 +535,7 @@ void testAggregatedMetricsManagerWithJVMCrash() throws Exception { + "public class ShutdownTestProg {\n" + "\n" + " public static void main(String[] args) throws Exception {\n" - + " AggregateMetricsManager mgr = AggregateMetricsManager.get(10, 3);\n" + + " AggregateMetricsManager mgr = AggregateMetricsManager.getInstance(10, 3);\n" + "\n" + " // Track how many times metrics flush\n" + " AtomicInteger calls1 = new AtomicInteger();\n" @@ -616,46 +610,59 @@ private void runProgramAndCaptureOutput(String program, boolean expectMetricsFlush, int expectedExitCode) throws IOException, InterruptedException { Path tempFile = Files.createTempFile("ShutdownTestProg", ".java"); - Files.write(tempFile, program.getBytes(StandardCharsets.UTF_8)); - - Path javaFile = tempFile.getParent().resolve("ShutdownTestProg.java"); - Files.move(tempFile, javaFile, StandardCopyOption.REPLACE_EXISTING); - - Process javac = new ProcessBuilder( - "javac", - "-classpath", System.getProperty("java.class.path"), - javaFile.toAbsolutePath().toString()) - .redirectErrorStream(true) - .start(); - - String compileOutput = readProcessOutput(javac); - javac.waitFor(); - - Assertions.assertThat(javac.exitValue()) - .withFailMessage("Compilation failed:\n" + compileOutput) - .isEqualTo(0); - - String classpath = javaFile.getParent().toAbsolutePath() - + File.pathSeparator - + System.getProperty("java.class.path"); - - Process javaProc = new ProcessBuilder("java", - "-XX:ErrorFile=/tmp/no_hs_err_%p.log", - "-classpath", classpath, - "ShutdownTestProg") - .redirectErrorStream(true) - .start(); - - String output = readProcessOutput(javaProc); - int exitCode = javaProc.waitFor(); - - Assertions.assertThat(output).contains("BEFORE_EXIT"); - Assertions.assertThat(exitCode).isEqualTo(expectedExitCode); - - if (expectMetricsFlush) { - Assertions.assertThat(output).contains("FLUSH:"); - } else { - Assertions.assertThat(output).doesNotContain("FLUSH:"); + try { + Files.write(tempFile, program.getBytes(StandardCharsets.UTF_8)); + + Path javaFile = tempFile.getParent().resolve("ShutdownTestProg.java"); + Files.move(tempFile, javaFile, StandardCopyOption.REPLACE_EXISTING); + + Process javac = new ProcessBuilder( + "javac", + "-classpath", System.getProperty("java.class.path"), + javaFile.toAbsolutePath().toString()) + .redirectErrorStream(true) + .start(); + + String compileOutput = readProcessOutput(javac); + javac.waitFor(); + if (!javac.waitFor(30, TimeUnit.SECONDS)) { + javac.destroyForcibly(); + throw new AssertionError("java process timed out"); + } + + Assertions.assertThat(javac.exitValue()) + .withFailMessage("Compilation failed:\n" + compileOutput) + .isEqualTo(0); + + String classpath = javaFile.getParent().toAbsolutePath() + + File.pathSeparator + + System.getProperty("java.class.path"); + + Process javaProc = new ProcessBuilder("java", + "-XX:ErrorFile=/tmp/no_hs_err_%p.log", + "-classpath", classpath, + "ShutdownTestProg") + .redirectErrorStream(true) + .start(); + + String output = readProcessOutput(javaProc); + int exitCode; + if (!javaProc.waitFor(30, TimeUnit.SECONDS)) { + javaProc.destroyForcibly(); + throw new AssertionError("java process timed out"); + } + exitCode = javaProc.exitValue(); + + Assertions.assertThat(output).contains("BEFORE_EXIT"); + Assertions.assertThat(exitCode).isEqualTo(expectedExitCode); + + if (expectMetricsFlush) { + Assertions.assertThat(output).contains("FLUSH:"); + } else { + Assertions.assertThat(output).doesNotContain("FLUSH:"); + } + } finally { + Files.deleteIfExists(tempFile); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java index cee857e39282e..aa2481fc19d58 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TestSimpleRateLimiter.java @@ -19,6 +19,7 @@ package org.apache.hadoop.fs.azurebfs.utils; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.LockSupport; import org.assertj.core.api.Assertions; @@ -45,12 +46,12 @@ void testNoWaitWhenSpacedOut() throws InvalidConfigurationValueException { // 2 permits per second → 500 ms interval SimpleRateLimiter limiter = new SimpleRateLimiter(2); - limiter.acquire(); + limiter.acquire(5, TimeUnit.SECONDS); // Sleep longer than required interval LockSupport.parkNanos(millisToSleep * NANOS_PER_MILLISECOND); long before = System.nanoTime(); - limiter.acquire(); // Should not block + limiter.acquire(5, TimeUnit.SECONDS); // Should not block long after = System.nanoTime(); long elapsed = after - before; @@ -76,10 +77,10 @@ void testRateLimitingDelay() throws InvalidConfigurationValueException { // 5 permits per second → 200ms interval SimpleRateLimiter limiter = new SimpleRateLimiter(permitsPerSecond); - limiter.acquire(); // First call never waits + limiter.acquire(5, TimeUnit.SECONDS); // First call never waits long before = System.nanoTime(); - limiter.acquire(); // Second call immediately → should wait ~200ms + limiter.acquire(5, TimeUnit.SECONDS); // Second call immediately → should wait ~200ms long after = System.nanoTime(); long elapsedMs = (after - before) / NANOS_PER_MILLISECOND; @@ -114,7 +115,7 @@ void testMultipleBurstCalls() for (int i = 0; i < 5; i++) { new Thread(() -> { try { - limiter.acquire(); + limiter.acquire(5, TimeUnit.SECONDS); } finally { latch.countDown(); } @@ -155,7 +156,7 @@ void testMultipleBurstCallsWhenPermitIsLess() long start = System.nanoTime(); for (int i = 0; i < 10; i++) { - limiter.acquire(); + limiter.acquire(5, TimeUnit.SECONDS); } long end = System.nanoTime(); From 3f8af3d2d6f9c5281ca305319c4f2e445e5ac54d Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Wed, 7 Jan 2026 19:52:27 -0800 Subject: [PATCH 08/10] Checkstyle Fixed --- .../apache/hadoop/fs/azurebfs/services/AbfsClient.java | 2 +- .../hadoop/fs/azurebfs/services/AbfsMetricsManager.java | 8 ++++---- .../hadoop/fs/azurebfs/services/ITestAbfsClient.java | 2 -- .../fs/azurebfs/services/TestAggregateMetricsManager.java | 5 +++-- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 00898e21f00ae..f76716207a356 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -339,7 +339,7 @@ public AbfsClient(final URL baseUrl, final AbfsServiceType abfsServiceType) throws IOException { this(baseUrl, sharedKeyCredentials, abfsConfiguration, - encryptionContextProvider, abfsClientContext,abfsServiceType); + encryptionContextProvider, abfsClientContext, abfsServiceType); this.sasTokenProvider = sasTokenProvider; this.tokenProvider = tokenProvider; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java index 1751e0fdb931e..39c6c82d7134b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java @@ -91,16 +91,16 @@ public class AbfsMetricsManager { private ScheduledExecutorService metricsEmitScheduler = null; // AbfsConfiguration instance - final AbfsConfiguration abfsConfiguration; + private final AbfsConfiguration abfsConfiguration; // AbfsCounters instance - final AbfsCounters abfsCounters; + private final AbfsCounters abfsCounters; // File system ID - final String fileSystemId; + private final String fileSystemId; // Storage account name - final String accountName; + private final String accountName; /** * Constructor for AbfsMetricsManager. diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index 635925bdc0b68..4a4c9628fc2d3 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -56,7 +56,6 @@ import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsApacheHttpExpect100Exception; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; -import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TokenAccessProviderException; import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; import org.apache.hadoop.fs.azurebfs.enums.AbfsBackoffMetricsEnum; @@ -67,7 +66,6 @@ import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; import org.apache.hadoop.fs.azurebfs.utils.UriUtils; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; -import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.test.ReflectionUtils; import org.apache.http.HttpClientConnection; import org.apache.http.HttpResponse; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java index 6e7c5fe609426..6058f95687137 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAggregateMetricsManager.java @@ -609,6 +609,7 @@ void testAggregatedMetricsManagerWithJVMCrash() throws Exception { private void runProgramAndCaptureOutput(String program, boolean expectMetricsFlush, int expectedExitCode) throws IOException, InterruptedException { + final long waitTimeInSeconds = 30; Path tempFile = Files.createTempFile("ShutdownTestProg", ".java"); try { Files.write(tempFile, program.getBytes(StandardCharsets.UTF_8)); @@ -625,7 +626,7 @@ private void runProgramAndCaptureOutput(String program, String compileOutput = readProcessOutput(javac); javac.waitFor(); - if (!javac.waitFor(30, TimeUnit.SECONDS)) { + if (!javac.waitFor(waitTimeInSeconds, TimeUnit.SECONDS)) { javac.destroyForcibly(); throw new AssertionError("java process timed out"); } @@ -647,7 +648,7 @@ private void runProgramAndCaptureOutput(String program, String output = readProcessOutput(javaProc); int exitCode; - if (!javaProc.waitFor(30, TimeUnit.SECONDS)) { + if (!javaProc.waitFor(waitTimeInSeconds, TimeUnit.SECONDS)) { javaProc.destroyForcibly(); throw new AssertionError("java process timed out"); } From fbd203fa98e588ba223c3078d7506631d41b9d7b Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Wed, 7 Jan 2026 23:12:14 -0800 Subject: [PATCH 09/10] Implemented AbfsMetricsManager with Closable interface --- .../fs/azurebfs/services/AbfsClient.java | 12 +++++----- .../azurebfs/services/AbfsMetricsManager.java | 22 +++++++++++-------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index f76716207a356..f3360e8b67164 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -274,7 +274,7 @@ private AbfsClient(final URL baseUrl, this.abfsMetricsManager = new AbfsMetricsManager(abfsConfiguration, abfsCounters, baseUrlString, indexLastForwardSlash, accountName, fileSystemId); // register the client to Aggregated Metrics Manager - getAbfsMetricsManager().getAggregateMetricsManager() + abfsMetricsManager.getAggregateMetricsManager() .registerClient(accountName, this); // Initialize write thread pool metrics if dynamic write thread pool scaling is enabled. @@ -346,9 +346,11 @@ public AbfsClient(final URL baseUrl, @Override public void close() throws IOException { - getAbfsMetricsManager().closeMetricsResources(); - getAbfsMetricsManager().getAggregateMetricsManager() - .deregisterClient(accountName, this); + if (abfsMetricsManager != null) { + abfsMetricsManager.close(); + abfsMetricsManager.getAggregateMetricsManager() + .deregisterClient(accountName, this); + } if (keepAliveCache != null) { keepAliveCache.close(); } @@ -1556,7 +1558,7 @@ public void getMetricCall(String metricsData) throws IOException { // Construct the URL for the metric call // In case of blob storage, the URL is changed to DFS URL - final URL url = createRequestUrl(getAbfsMetricsManager().getMetricsUrl(), + final URL url = createRequestUrl(abfsMetricsManager.getMetricsUrl(), EMPTY_STRING, abfsUriQueryBuilder.toString()); final AbfsRestOperation op = getAbfsRestOperation( AbfsRestOperationType.GetFileSystemProperties, diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java index 39c6c82d7134b..9c02114b55681 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.io.Closeable; import java.io.IOException; import java.net.URL; import java.util.Timer; @@ -47,7 +48,7 @@ * AbfsMetricsManager is responsible for managing metrics collection * and emission for an AbfsClient instance. */ -public class AbfsMetricsManager { +public class AbfsMetricsManager implements Closeable { // Timer thread name for AbfsMetricsManager public static final String ABFS_CLIENT_TIMER_THREAD_NAME @@ -201,15 +202,18 @@ public AbfsMetricsManager(final AbfsConfiguration abfsConfiguration, * This method cancels any running timer tasks, shuts down the metrics emission scheduler, * and emits any collected metrics before closing. */ - public void closeMetricsResources() { + @Override + public void close() { + if (runningTimerTask != null) { + runningTimerTask.cancel(); + } + if (timer != null) { + timer.cancel(); + } + if (metricsEmitScheduler != null && !metricsEmitScheduler.isShutdown()) { + metricsEmitScheduler.shutdownNow(); + } if (isMetricCollectionEnabled()) { - if (runningTimerTask != null) { - runningTimerTask.cancel(); - timer.cancel(); - } - if (metricsEmitScheduler != null && !metricsEmitScheduler.isShutdown()) { - metricsEmitScheduler.shutdownNow(); - } emitCollectedMetrics(); } } From 655dcb1d03ea24095a9cbb5ea9f62a0a8b6ffe40 Mon Sep 17 00:00:00 2001 From: bhattmanish98 Date: Thu, 8 Jan 2026 21:10:10 -0800 Subject: [PATCH 10/10] set timer to null after cancel --- .../apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java index 9c02114b55681..455d71f9ed8ba 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsMetricsManager.java @@ -206,12 +206,15 @@ public AbfsMetricsManager(final AbfsConfiguration abfsConfiguration, public void close() { if (runningTimerTask != null) { runningTimerTask.cancel(); + runningTimerTask = null; } if (timer != null) { timer.cancel(); + timer = null; } if (metricsEmitScheduler != null && !metricsEmitScheduler.isShutdown()) { metricsEmitScheduler.shutdownNow(); + metricsEmitScheduler = null; } if (isMetricCollectionEnabled()) { emitCollectedMetrics();