diff --git a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml
index 070c8c1fe827ae..2065746b766116 100644
--- a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml
+++ b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml
@@ -48,4 +48,11 @@
files="org[\\/]apache[\\/]hadoop[\\/]fs[\\/]azurebfs[\\/]utils[\\/]Base64.java"/>
+
+
+
+
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java
index d7ba3debec209f..af2b696964451c 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java
@@ -807,7 +807,7 @@ public void initialize(URI uri, Configuration conf, AzureFileSystemInstrumentati
LOG.debug("Page blob directories: {}", setToString(pageBlobDirs));
// User-agent
- userAgentId = "wasbdriverV2.1";
+ userAgentId = "wasbdriverV2.2";
// Extract the directories that should contain block blobs with compaction
blockBlobWithCompationDirs = getDirectorySet(
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
index 771f95a8b64e63..4179da21fd5e9e 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java
@@ -21,6 +21,7 @@
import java.io.IOException;
import java.lang.reflect.Field;
+import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys;
import org.apache.hadoop.fs.azurebfs.services.PrefixMode;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
@@ -119,6 +120,15 @@ public class AbfsConfiguration{
DefaultValue = DEFAULT_OPTIMIZE_FOOTER_READ)
private boolean optimizeFooterRead;
+ @BooleanConfigurationValidatorAnnotation(
+ ConfigurationKey = FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED,
+ DefaultValue = DEFAULT_FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED)
+ private boolean isExpectHeaderEnabled;
+
+ @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED,
+ DefaultValue = DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED)
+ private boolean accountThrottlingEnabled;
+
@IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_READ_BUFFER_SIZE,
MinValue = MIN_BUFFER_SIZE,
MaxValue = MAX_BUFFER_SIZE,
@@ -246,7 +256,7 @@ public class AbfsConfiguration{
private int readAheadQueueDepth;
@IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_BLOB_DIR_RENAME_MAX_THREAD,
- DefaultValue = 0)
+ DefaultValue = DEFAULT_FS_AZURE_BLOB_RENAME_THREAD)
private int blobDirRenameMaxThread;
@LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_BLOB_COPY_PROGRESS_POLL_WAIT_MILLIS,
@@ -275,6 +285,14 @@ public class AbfsConfiguration{
DefaultValue = DEFAULT_ENABLE_AUTOTHROTTLING)
private boolean enableAutoThrottling;
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ACCOUNT_OPERATION_IDLE_TIMEOUT,
+ DefaultValue = DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS)
+ private int accountOperationIdleTimeout;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ANALYSIS_PERIOD,
+ DefaultValue = DEFAULT_ANALYSIS_PERIOD_MS)
+ private int analysisPeriod;
+
@StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_USER_AGENT_PREFIX_KEY,
DefaultValue = DEFAULT_FS_AZURE_USER_AGENT_PREFIX)
private String userAgentId;
@@ -326,8 +344,12 @@ public class AbfsConfiguration{
FS_AZURE_ENABLE_ABFS_LIST_ITERATOR, DefaultValue = DEFAULT_ENABLE_ABFS_LIST_ITERATOR)
private boolean enableAbfsListIterator;
- @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_MAX_CONSUMER_LAG, DefaultValue = DEFAULT_FS_AZURE_MAX_CONSUMER_LAG)
- private int maximumConsumerLag;
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_PRODUCER_QUEUE_MAX_SIZE, DefaultValue = DEFAULT_FS_AZURE_PRODUCER_QUEUE_MAX_SIZE)
+ private int producerQueueMaxSize;
+
+ @BooleanConfigurationValidatorAnnotation(ConfigurationKey=FS_AZURE_LEASE_CREATE_NON_RECURSIVE, DefaultValue = DEFAULT_FS_AZURE_LEASE_CREATE_NON_RECURSIVE)
+ private boolean leaseOnCreateNonRecursive;
public AbfsConfiguration(final Configuration rawConfig, String accountName)
throws IllegalAccessException, InvalidConfigurationValueException, IOException {
@@ -408,6 +430,11 @@ public boolean shouldEnableBlobEndPoint() {
DefaultValue = DEFAULT_FS_AZURE_INGRESS_FALLBACK_TO_DFS)
private boolean ingressFallbackToDfs;
+ @BooleanConfigurationValidatorAnnotation(
+ ConfigurationKey = FS_AZURE_READ_FALLBACK_TO_DFS,
+ DefaultValue = DEFAULT_AZURE_READ_FALLBACK_TO_DFS)
+ private boolean readFallbackToDfs;
+
public boolean shouldMkdirFallbackToDfs() {
return mkdirFallbackToDfs;
}
@@ -416,6 +443,10 @@ public boolean shouldIngressFallbackToDfs() {
return ingressFallbackToDfs;
}
+ public boolean shouldReadFallbackToDfs() {
+ return readFallbackToDfs;
+ }
+
/**
* Gets the Azure Storage account name corresponding to this instance of configuration.
* @return the Azure Storage account name
@@ -768,6 +799,14 @@ public String getAppendBlobDirs() {
return this.azureAppendBlobDirs;
}
+ public boolean isExpectHeaderEnabled() {
+ return this.isExpectHeaderEnabled;
+ }
+
+ public boolean accountThrottlingEnabled() {
+ return accountThrottlingEnabled;
+ }
+
public String getAzureInfiniteLeaseDirs() {
return this.azureInfiniteLeaseDirs;
}
@@ -810,8 +849,16 @@ public boolean isAutoThrottlingEnabled() {
return this.enableAutoThrottling;
}
+ public int getAccountOperationIdleTimeout() {
+ return accountOperationIdleTimeout;
+ }
+
+ public int getAnalysisPeriod() {
+ return analysisPeriod;
+ }
+
public String getCustomUserAgentPrefix() {
- return "abfsdriverV2.1";
+ return "abfsdriverV2.2";
}
public String getClusterName() {
@@ -1184,8 +1231,11 @@ public void setEnableAbfsListIterator(boolean enableAbfsListIterator) {
this.enableAbfsListIterator = enableAbfsListIterator;
}
- public int getMaximumConsumerLag() {
- return maximumConsumerLag;
+ public int getProducerQueueMaxSize() {
+ return producerQueueMaxSize;
}
+ public boolean isLeaseOnCreateNonRecursive() {
+ return leaseOnCreateNonRecursive;
+ }
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java
index 0e22f5e4334e90..d79acfe3202a3c 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java
@@ -26,6 +26,7 @@
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
import java.nio.file.AccessDeniedException;
import java.util.Hashtable;
import java.util.List;
@@ -58,7 +59,6 @@
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
-import org.apache.hadoop.fs.azurebfs.services.AbfsClientThrottlingIntercept;
import org.apache.hadoop.fs.azurebfs.services.AbfsListStatusRemoteIterator;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.classification.InterfaceStability;
@@ -114,6 +114,7 @@
import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL;
import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_DEFAULT;
import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.*;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOB_LEASE_ONE_MINUTE_DURATION;
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_BLOB_ENDPOINT;
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_DNS_PREFIX;
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.WASB_DNS_PREFIX;
@@ -126,6 +127,8 @@
import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.PATH_EXISTS;
import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND;
import static org.apache.hadoop.fs.azurebfs.constants.InternalConstants.CAPABILITY_SAFE_READAHEAD;
+import static org.apache.hadoop.fs.azurebfs.utils.UriUtils.decodeMetadataAttribute;
+import static org.apache.hadoop.fs.azurebfs.utils.UriUtils.encodeMetadataAttribute;
import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel;
import static org.apache.hadoop.util.functional.RemoteIterators.filteringRemoteIterator;
@@ -262,7 +265,6 @@ public void initialize(URI uri, Configuration configuration)
}
}
- AbfsClientThrottlingIntercept.initializeSingleton(abfsConfiguration.isAutoThrottlingEnabled());
boolean isRedirect = abfsConfiguration.isRedirection();
if (isRedirect) {
String abfsUrl = uri.toString();
@@ -365,7 +367,7 @@ private FSDataInputStream open(final Path path,
TracingContext tracingContext = new TracingContext(clientCorrelationId,
fileSystemId, FSOperationType.OPEN, tracingHeaderFormat,
listener);
- InputStream inputStream = abfsStore.openFileForRead(qualifiedPath,
+ InputStream inputStream = getAbfsStore().openFileForRead(qualifiedPath,
options, statistics, tracingContext);
return new FSDataInputStream(inputStream);
} catch(AzureBlobFileSystemException ex) {
@@ -510,8 +512,10 @@ public FSDataOutputStream createNonRecursive(final Path f, final FsPermission pe
String parentPath = parent.toUri().getPath();
if (getAbfsStore().getAbfsConfiguration().getPrefixMode() == PrefixMode.BLOB
&& getAbfsStore().isAtomicRenameKey(parentPath)) {
- abfsBlobLease = new AbfsBlobLease(getAbfsClient(),
- parentPath, tracingContext);
+ if(getAbfsStore().getAbfsConfiguration().isLeaseOnCreateNonRecursive()) {
+ abfsBlobLease = new AbfsBlobLease(getAbfsClient(),
+ parentPath, BLOB_LEASE_ONE_MINUTE_DURATION, tracingContext);
+ }
}
final FileStatus parentFileStatus = tryGetFileStatus(parent, tracingContext);
@@ -1046,10 +1050,21 @@ private FileStatus getFileStatus(final Path path,
LOG.debug("AzureBlobFileSystem.getFileStatus path: {}", path);
statIncrement(CALL_GET_FILE_STATUS);
Path qualifiedPath = makeQualified(path);
+ FileStatus fileStatus;
try {
- FileStatus fileStatus = abfsStore.getFileStatus(qualifiedPath,
- tracingContext);
+ if (abfsStore.getPrefixMode() == PrefixMode.BLOB) {
+ /**
+ * Get File Status over Blob Endpoint will Have an additional call
+ * to check if directory is implicit.
+ */
+ fileStatus = abfsStore.getFileStatusOverBlob(qualifiedPath,
+ tracingContext);
+ }
+ else {
+ fileStatus = abfsStore.getFileStatus(qualifiedPath,
+ tracingContext);
+ }
if (getAbfsStore().getAbfsConfiguration().getPrefixMode()
== PrefixMode.BLOB && fileStatus != null && fileStatus.isDirectory()
&&
@@ -1314,13 +1329,30 @@ public void setXAttr(final Path path, final String name, final byte[] value, fin
TracingContext tracingContext = new TracingContext(clientCorrelationId,
fileSystemId, FSOperationType.SET_ATTR, true, tracingHeaderFormat,
listener);
- Hashtable properties = abfsStore
- .getPathStatus(qualifiedPath, tracingContext);
+ Hashtable properties;
String xAttrName = ensureValidAttributeName(name);
+ String xAttrValue;
+
+ if (abfsStore.getPrefixMode() == PrefixMode.BLOB) {
+ properties = abfsStore.getBlobMetadata(qualifiedPath, tracingContext);
+
+ boolean xAttrExists = properties.containsKey(xAttrName);
+ XAttrSetFlag.validate(name, xAttrExists, flag);
+
+ // On Blob Endpoint metadata are passed as HTTP Request Headers
+ // Values in UTF_8 needed to be URL encoded after decoding into String
+ xAttrValue = encodeMetadataAttribute(new String(value, StandardCharsets.UTF_8));
+ properties.put(xAttrName, xAttrValue);
+ abfsStore.setBlobMetadata(qualifiedPath, properties, tracingContext);
+
+ return;
+ }
+
+ properties = abfsStore.getPathStatus(qualifiedPath, tracingContext);
boolean xAttrExists = properties.containsKey(xAttrName);
XAttrSetFlag.validate(name, xAttrExists, flag);
- String xAttrValue = abfsStore.decodeAttribute(value);
+ xAttrValue = abfsStore.decodeAttribute(value);
properties.put(xAttrName, xAttrValue);
abfsStore.setPathProperties(qualifiedPath, properties, tracingContext);
} catch (AzureBlobFileSystemException ex) {
@@ -1354,9 +1386,21 @@ public byte[] getXAttr(final Path path, final String name)
TracingContext tracingContext = new TracingContext(clientCorrelationId,
fileSystemId, FSOperationType.GET_ATTR, true, tracingHeaderFormat,
listener);
- Hashtable properties = abfsStore
- .getPathStatus(qualifiedPath, tracingContext);
+ Hashtable properties;
String xAttrName = ensureValidAttributeName(name);
+
+ if (abfsStore.getPrefixMode() == PrefixMode.BLOB) {
+ properties = abfsStore.getBlobMetadata(qualifiedPath, tracingContext);
+ if (properties.containsKey(xAttrName)) {
+ String xAttrValue = properties.get(xAttrName);
+ value = decodeMetadataAttribute(xAttrValue).getBytes(
+ StandardCharsets.UTF_8);
+ }
+ return value;
+ }
+
+ properties = abfsStore.getPathStatus(qualifiedPath, tracingContext);
+
if (properties.containsKey(xAttrName)) {
String xAttrValue = properties.get(xAttrName);
value = abfsStore.encodeAttribute(xAttrValue);
@@ -2022,4 +2066,5 @@ public boolean hasPathCapability(final Path path, final String capability)
public IOStatistics getIOStatistics() {
return abfsCounters != null ? abfsCounters.getIOStatistics() : null;
}
+
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
index 7816391c8dccd4..da3d2d6933e42f 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java
@@ -61,10 +61,12 @@
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException;
import org.apache.hadoop.fs.azurebfs.enums.BlobCopyProgress;
import org.apache.hadoop.fs.azurebfs.services.AbfsBlobLease;
+import org.apache.hadoop.fs.azurebfs.services.AbfsDfsLease;
import org.apache.hadoop.fs.azurebfs.services.ListBlobConsumer;
import org.apache.hadoop.fs.azurebfs.services.ListBlobProducer;
import org.apache.hadoop.fs.azurebfs.services.ListBlobQueue;
import org.apache.hadoop.fs.azurebfs.services.OperativeEndpoint;
+import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader;
import org.apache.hadoop.fs.azurebfs.services.PrefixMode;
import org.apache.hadoop.fs.azurebfs.services.BlobList;
import org.apache.hadoop.fs.azurebfs.services.BlobProperty;
@@ -146,9 +148,12 @@
import org.apache.hadoop.util.concurrent.HadoopExecutors;
import org.apache.http.client.utils.URIBuilder;
-import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH;
-import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicityUtils.SUFFIX;
import static java.net.HttpURLConnection.HTTP_CONFLICT;
+import static java.net.HttpURLConnection.HTTP_NOT_FOUND;
+
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOB_LEASE_ONE_MINUTE_DURATION;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH;
+import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_METADATA_PREFIX;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_EQUALS;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_FORWARD_SLASH;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_HYPHEN;
@@ -176,6 +181,7 @@
import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_ABORTED;
import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_FAILED;
import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.PATH_EXISTS;
+import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicityUtils.SUFFIX;
/**
* Provides the bridging logic between Hadoop's abstract filesystem and Azure Storage.
@@ -206,8 +212,6 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport {
private final AbfsCounters abfsCounters;
private PrefixMode prefixMode;
- private final ExecutorService renameBlobExecutorService;
-
/**
* The set of directories where we should store files as append blobs.
*/
@@ -298,14 +302,6 @@ public AzureBlobFileSystemStore(
abfsConfiguration.getMaxWriteRequestsToQueue(),
10L, TimeUnit.SECONDS,
"abfs-bounded");
- if (abfsConfiguration.getBlobDirRenameMaxThread() == 0) {
- renameBlobExecutorService = Executors.newFixedThreadPool(
- Runtime.getRuntime()
- .availableProcessors());
- } else {
- renameBlobExecutorService = Executors.newFixedThreadPool(
- abfsConfiguration.getBlobDirRenameMaxThread());
- }
}
/**
@@ -686,18 +682,130 @@ BlobProperty getBlobProperty(Path blobPath,
* @throws AzureBlobFileSystemException exception thrown from
* {@link AbfsClient#getBlobProperty(Path, TracingContext)} call
*/
- BlobProperty getContainerProperty(TracingContext tracingContext) throws AzureBlobFileSystemException {
- AbfsRestOperation op = client.getContainerProperty(tracingContext);
- BlobProperty blobProperty = new BlobProperty();
+ BlobProperty getContainerProperty(TracingContext tracingContext)
+ throws AzureBlobFileSystemException {
+ try (AbfsPerfInfo perfInfo = startTracking("getContainerProperty", "getContainerProperty")) {
+ LOG.debug("getContainerProperty for filesystem: {} path: {}",
+ client.getFileSystem());
- final AbfsHttpOperation opResult = op.getResult();
+ AbfsRestOperation op = client.getContainerProperty(tracingContext);
+ perfInfo.registerResult(op.getResult()).registerSuccess(true);
- blobProperty.setIsDirectory(true);
- blobProperty.setPath(new Path("/"));
+ BlobProperty blobProperty = new BlobProperty();
+ blobProperty.setIsDirectory(true);
+ blobProperty.setPath(new Path(FORWARD_SLASH));
- return blobProperty;
+ return blobProperty;
+ }
+ }
+
+ /**
+ * Gets user-defined properties(metadata) of the blob over blob endpoint.
+ * @param path
+ * @param tracingContext
+ * @return hashmap containing key value pairs for blob metadata
+ * @throws AzureBlobFileSystemException
+ */
+ public Hashtable getBlobMetadata(final Path path,
+ TracingContext tracingContext) throws AzureBlobFileSystemException {
+ try (AbfsPerfInfo perfInfo = startTracking("getBlobMetadata", "getBlobMetadata")) {
+ LOG.debug("getBlobMetadata for filesystem: {} path: {}",
+ client.getFileSystem(),
+ path);
+
+ final AbfsRestOperation op = client.getBlobMetadata(path, tracingContext);
+ perfInfo.registerResult(op.getResult()).registerSuccess(true);
+
+ final Hashtable metadata = parseResponseHeadersToHashTable(op.getResult());
+ return metadata;
+ }
+ catch (AbfsRestOperationException ex) {
+ // The path does not exist explicitly.
+ // Check here if the path is an implicit dir
+ if (ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND && !path.isRoot()) {
+ List blobProperties = getListBlobs(path, null,
+ tracingContext, 2, true);
+ if (blobProperties.size() == 0) {
+ throw ex;
+ }
+ else {
+ // Path exists as implicit directory.
+ // Return empty hashmap for properties
+ return new Hashtable<>();
+ }
+ }
+ else {
+ throw ex;
+ }
+ }
+ }
+
+ /**
+ * Sets user-defined properties(metadata) of the blob over blob endpoint.
+ * @param path on which metadata is to be set
+ * @param metadata set of user-defined properties to be set
+ * @param tracingContext
+ * @throws AzureBlobFileSystemException
+ */
+ public void setBlobMetadata(final Path path,
+ final Hashtable metadata, TracingContext tracingContext)
+ throws AzureBlobFileSystemException {
+ try (AbfsPerfInfo perfInfo = startTracking("setBlobMetadata", "setBlobMetadata")) {
+ LOG.debug("setBlobMetadata for filesystem: {} path: {} with properties: {}",
+ client.getFileSystem(),
+ path,
+ metadata);
+
+ final List metadataRequestHeaders = getRequestHeadersForMetadata(metadata);
+ final AbfsRestOperation op = client.setBlobMetadata(
+ path, metadataRequestHeaders, tracingContext);
+
+ perfInfo.registerResult(op.getResult()).registerSuccess(true);
+ }
+ }
+
+ /**
+ * User-Defined Properties over blob endpoint are actually response headers
+ * with prefix "x-ms-meta-". Each property is a different response header.
+ * This parses all the headers, removes the prefix and create a hashmap.
+ * @param result AbfsHttpOperation result containing response headers.
+ * @return Hashmap defining user defined metadata.
+ */
+ private Hashtable parseResponseHeadersToHashTable(
+ AbfsHttpOperation result) {
+ final Hashtable metadata = new Hashtable<>();
+ String name, value;
+
+ final Map> responseHeaders = result.getResponseHeaders();
+ for (Map.Entry> entry : responseHeaders.entrySet()) {
+ name = entry.getKey();
+
+ if (name != null && name.startsWith(X_MS_METADATA_PREFIX)) {
+ value = entry.getValue().get(0);
+ metadata.put(name.substring(X_MS_METADATA_PREFIX.length()), value);
+ }
+ }
+ return metadata;
+ }
+
+ /**
+ * User-defined properties over blob endpoint are required to be set
+ * as request header with prefix "x-ms-meta-". Each property need to be made
+ * into a different request header. This parses all the properties, add prefix
+ * and create request headers.
+ * @param metadata Hashmap
+ * @return List of request headers to be passed with API call.
+ */
+ private List getRequestHeadersForMetadata(Hashtable metadata) {
+ final List headers = new ArrayList();
+
+ for(Map.Entry entry : metadata.entrySet()) {
+ headers.add(new AbfsHttpHeader(X_MS_METADATA_PREFIX + entry.getKey(), entry.getValue()));
+ }
+ return headers;
}
+
/**
* Get the list of a blob on a give path, or blob starting with the given prefix.
*
@@ -926,7 +1034,11 @@ private AbfsRestOperation conditionalCreateOverwriteFile(final String relativePa
if (e.getStatusCode() == HTTP_CONFLICT) {
// File pre-exists, fetch eTag
try {
- op = client.getPathStatus(relativePath, false, tracingContext);
+ if (getPrefixMode() == PrefixMode.BLOB) {
+ op = client.getBlobProperty(new Path(relativePath), tracingContext);
+ } else {
+ op = client.getPathStatus(relativePath, false, tracingContext);
+ }
} catch (AbfsRestOperationException ex) {
if (ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) {
// Is a parallel access case, as file which was found to be
@@ -997,6 +1109,7 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext(
}
return new AbfsOutputStreamContext(abfsConfiguration.getSasTokenRenewPeriodForStreamsInSeconds())
.withWriteBufferSize(bufferSize)
+ .enableExpectHeader(abfsConfiguration.isExpectHeaderEnabled())
.enableFlush(abfsConfiguration.isFlushEnabled())
.enableSmallWriteOptimization(abfsConfiguration.isSmallWriteOptimizationEnabled())
.disableOutputStreamFlush(abfsConfiguration.isOutputStreamFlushDisabled())
@@ -1125,15 +1238,45 @@ public AbfsInputStream openFileForRead(final Path path,
String relativePath = getRelativePath(path);
- final AbfsRestOperation op = client
- .getPathStatus(relativePath, false, tracingContext);
+ AbfsRestOperation op;
+ if (getPrefixMode() == PrefixMode.BLOB) {
+ try {
+ op = client.getBlobProperty(new Path(relativePath), tracingContext);
+ } catch (AbfsRestOperationException e) {
+ if (e.getStatusCode() != HTTP_NOT_FOUND) {
+ throw e;
+ }
+ List blobsList = getListBlobs(new Path(relativePath), null,
+ tracingContext, 2, true);
+ if (blobsList.size() > 0) {
+ throw new AbfsRestOperationException(
+ AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(),
+ AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(),
+ "openFileForRead must be used with files and not directories",
+ null);
+ } else {
+ throw e;
+ }
+ }
+ } else {
+ op = client
+ .getPathStatus(relativePath, false, tracingContext);
+ }
+
perfInfo.registerResult(op.getResult());
- final String resourceType = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_RESOURCE_TYPE);
+ boolean isDirectory;
+ if (getPrefixMode() == PrefixMode.BLOB) {
+ isDirectory = Boolean.parseBoolean(op.getResult().getResponseHeader(X_MS_META_HDI_ISFOLDER));
+ } else {
+ final String resourceType = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_RESOURCE_TYPE);
+ isDirectory = parseIsDirectory(resourceType);
+ }
+
final long contentLength = Long.parseLong(op.getResult().getResponseHeader(HttpHeaderConfigurations.CONTENT_LENGTH));
final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG);
- if (parseIsDirectory(resourceType)) {
+ if (isDirectory) {
throw new AbfsRestOperationException(
AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(),
AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(),
@@ -1183,14 +1326,44 @@ public OutputStream openFileForWrite(final Path path,
String relativePath = getRelativePath(path);
- final AbfsRestOperation op = client
- .getPathStatus(relativePath, false, tracingContext);
+ final AbfsRestOperation op;
+ try {
+ if (getPrefixMode() == PrefixMode.BLOB) {
+ op = client.getBlobProperty(path, tracingContext);
+ } else {
+ op = client.getPathStatus(relativePath, false, tracingContext);
+ }
+ } catch (AbfsRestOperationException ex) {
+ // The path does not exist explicitly.
+ // Check here if the path is an implicit dir
+ if (getPrefixMode() == PrefixMode.BLOB && ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) {
+ List blobProperties = getListBlobs(path, null,
+ tracingContext, 2, true);
+ if (blobProperties.size() != 0) {
+ throw new AbfsRestOperationException(
+ AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(),
+ AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(),
+ "openFileForWrite must be used with files and not directories",
+ null);
+ } else {
+ throw ex;
+ }
+ } else {
+ throw ex;
+ }
+ }
perfInfo.registerResult(op.getResult());
final String resourceType = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_RESOURCE_TYPE);
final Long contentLength = Long.valueOf(op.getResult().getResponseHeader(HttpHeaderConfigurations.CONTENT_LENGTH));
- if (parseIsDirectory(resourceType)) {
+ boolean isDirectory;
+ if (getPrefixMode() == PrefixMode.BLOB) {
+ isDirectory = op.getResult().getResponseHeader(X_MS_META_HDI_ISFOLDER) != null;
+ } else {
+ isDirectory = parseIsDirectory(resourceType);
+ }
+ if (isDirectory) {
throw new AbfsRestOperationException(
AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(),
AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(),
@@ -1275,7 +1448,10 @@ public void rename(final Path source, final Path destination,
String nextMarker = blobList.getNextMarker();
List srcBlobProperties = blobList.getBlobPropertyList();
- ListBlobQueue listBlobQueue = new ListBlobQueue(blobList);
+ ListBlobQueue listBlobQueue = new ListBlobQueue(
+ blobList.getBlobPropertyList(),
+ getAbfsConfiguration().getProducerQueueMaxSize(),
+ getAbfsConfiguration().getBlobDirRenameMaxThread());
if (nextMarker != null) {
new ListBlobProducer(listSrc,
client, listBlobQueue, nextMarker, tracingContext);
@@ -1365,7 +1541,7 @@ public void rename(final Path source, final Path destination,
if (isAtomicRenameKey(source.toUri().getPath())) {
LOG.debug("source dir {} is an atomicRenameKey",
source.toUri().getPath());
- srcDirLease = new AbfsBlobLease(client, source.toUri().getPath(), tracingContext);
+ srcDirLease = new AbfsBlobLease(client, source.toUri().getPath(), BLOB_LEASE_ONE_MINUTE_DURATION, tracingContext);
renameAtomicityUtils.preRename(srcBlobProperties, isCreateOperationOnBlobEndpoint());
isAtomicRename = true;
} else {
@@ -1385,7 +1561,8 @@ public void rename(final Path source, final Path destination,
LOG.debug("source {} is not directory", source);
String leaseId = null;
if (isAtomicRenameKey(source.toUri().getPath())) {
- leaseId = new AbfsBlobLease(client, source.toUri().getPath(), tracingContext).getLeaseId();
+ leaseId = new AbfsBlobLease(client, source.toUri().getPath(),
+ BLOB_LEASE_ONE_MINUTE_DURATION, tracingContext).getLeaseID();
}
renameBlob(blobPropOnSrc.getPath(), destination, leaseId, tracingContext
);
@@ -1434,15 +1611,18 @@ private void renameBlobDir(final Path source,
final ListBlobQueue listBlobQueue,
final BlobProperty blobPropOnSrc, final AbfsBlobLease srcDirBlobLease,
final Boolean isAtomicRename) throws AzureBlobFileSystemException {
- BlobList blobList;
+ List blobList;
ListBlobConsumer listBlobConsumer = new ListBlobConsumer(listBlobQueue);
+ final ExecutorService renameBlobExecutorService
+ = Executors.newFixedThreadPool(
+ getAbfsConfiguration().getBlobDirRenameMaxThread());
while(!listBlobConsumer.isCompleted()) {
blobList = listBlobConsumer.consume();
if(blobList == null) {
continue;
}
List futures = new ArrayList<>();
- for (BlobProperty blobProperty : blobList.getBlobPropertyList()) {
+ for (BlobProperty blobProperty : blobList) {
futures.add(renameBlobExecutorService.submit(() -> {
try {
AbfsBlobLease blobLease = null;
@@ -1459,16 +1639,13 @@ private void renameBlobDir(final Path source,
* on a log file, to gain exclusive access to it, before it splits it.
*/
blobLease = new AbfsBlobLease(client,
- blobProperty.getPath().toUri().getPath(), tracingContext);
- }
- if(srcDirBlobLease != null) {
- srcDirBlobLease.renewIfRequired();
+ blobProperty.getPath().toUri().getPath(), BLOB_LEASE_ONE_MINUTE_DURATION, tracingContext);
}
renameBlob(
blobProperty.getPath(),
createDestinationPathForBlobPartOfRenameSrcDir(destination,
blobProperty, source),
- blobLease != null ? blobLease.getLeaseId() : null,
+ blobLease != null ? blobLease.getLeaseID() : null,
tracingContext);
} catch (AzureBlobFileSystemException e) {
LOG.error(String.format("rename from %s to %s for blob %s failed",
@@ -1487,11 +1664,12 @@ private void renameBlobDir(final Path source,
}
}
}
+ renameBlobExecutorService.shutdown();
renameBlob(
blobPropOnSrc.getPath(), createDestinationPathForBlobPartOfRenameSrcDir(destination,
blobPropOnSrc, source),
- srcDirBlobLease != null ? srcDirBlobLease.getLeaseId() : null,
+ srcDirBlobLease != null ? srcDirBlobLease.getLeaseID() : null,
tracingContext);
}
@@ -1651,6 +1829,98 @@ public FileStatus getFileStatus(final Path path,
}
}
+ public FileStatus getFileStatusOverBlob(final Path path,
+ TracingContext tracingContext) throws IOException {
+ try (AbfsPerfInfo perfInfo = startTracking("getFileStatus", "undetermined")) {
+ LOG.debug("getFileStatus filesystem call over blob endpoint: {} path: {}",
+ client.getFileSystem(),
+ path);
+
+ final AbfsRestOperation op;
+
+ // Try to getBlobProperty for explicit blobs
+ if (path.isRoot()) {
+ perfInfo.registerCallee("getContainerProperties");
+ op = client.getContainerProperty(tracingContext);
+ } else {
+ perfInfo.registerCallee("getBlobProperty");
+ op = client.getBlobProperty(path, tracingContext);
+ }
+
+ perfInfo.registerResult(op.getResult());
+ final long blockSize = abfsConfiguration.getAzureBlockSize();
+ final AbfsHttpOperation result = op.getResult();
+
+ String eTag = extractEtagHeader(result);
+ final String lastModified = result.getResponseHeader(HttpHeaderConfigurations.LAST_MODIFIED);
+ final long contentLength;
+ final boolean resourceIsDir;
+
+ if (path.isRoot()) {
+ contentLength = 0;
+ resourceIsDir = true;
+ } else {
+ contentLength = parseContentLength(result.getResponseHeader(
+ HttpHeaderConfigurations.CONTENT_LENGTH));
+ resourceIsDir = result.getResponseHeader(
+ X_MS_META_HDI_ISFOLDER) != null;
+ }
+
+ final String transformedOwner = identityTransformer.transformIdentityForGetRequest(
+ result.getResponseHeader(HttpHeaderConfigurations.X_MS_OWNER),
+ true,
+ userName);
+
+ final String transformedGroup = identityTransformer.transformIdentityForGetRequest(
+ result.getResponseHeader(HttpHeaderConfigurations.X_MS_GROUP),
+ false,
+ primaryUserGroup);
+
+ perfInfo.registerSuccess(true);
+
+ return new VersionedFileStatus(
+ transformedOwner,
+ transformedGroup,
+ new AbfsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL),
+ false,
+ contentLength,
+ resourceIsDir,
+ 1,
+ blockSize,
+ DateTimeUtils.parseLastModifiedTime(lastModified),
+ path,
+ eTag);
+ }
+ catch (AbfsRestOperationException ex) {
+ // The path does not exist explicitly.
+ // Check here if the path is an implicit dir
+ if (ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND && !path.isRoot()) {
+ List blobProperties = getListBlobs(path,null, tracingContext, 2, true);
+ if (blobProperties.size() == 0) {
+ throw ex;
+ }
+ else {
+ // TODO: return properties of first child blob here like in wasb after listFileStatus is implemented over blob
+ return new VersionedFileStatus(
+ userName,
+ primaryUserGroup,
+ new AbfsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL),
+ false,
+ 0L,
+ true,
+ 1,
+ abfsConfiguration.getAzureBlockSize(),
+ DateTimeUtils.parseLastModifiedTime(null),
+ path,
+ null);
+ }
+ }
+ else {
+ throw ex;
+ }
+ }
+ }
+
/**
* @param path The list path.
* @param tracingContext Tracks identifiers for request header
@@ -2170,7 +2440,9 @@ RenameAtomicityUtils.RedoRenameInvocation getRedoRenameInvocation(final TracingC
public void redo(final Path destination, final Path src)
throws AzureBlobFileSystemException {
- ListBlobQueue listBlobQueue = new ListBlobQueue();
+ ListBlobQueue listBlobQueue = new ListBlobQueue(
+ getAbfsConfiguration().getProducerQueueMaxSize(),
+ getAbfsConfiguration().getBlobDirRenameMaxThread());
StringBuilder listSrcBuilder = new StringBuilder(src.toUri().getPath());
if (!src.isRoot()) {
listSrcBuilder.append(FORWARD_SLASH);
@@ -2180,7 +2452,7 @@ public void redo(final Path destination, final Path src)
tracingContext);
BlobProperty srcBlobProperty = getBlobProperty(src, tracingContext);
AbfsBlobLease abfsBlobLease = new AbfsBlobLease(client,
- src.toUri().getPath(), tracingContext);
+ src.toUri().getPath(), BLOB_LEASE_ONE_MINUTE_DURATION, tracingContext);
renameBlobDir(src, destination, tracingContext, listBlobQueue,
srcBlobProperty, abfsBlobLease, true);
}
@@ -2618,7 +2890,12 @@ private AbfsLease maybeCreateLease(String relativePath, TracingContext tracingCo
if (!enableInfiniteLease) {
return null;
}
- AbfsLease lease = new AbfsLease(client, relativePath, tracingContext);
+ final AbfsLease lease;
+ if (getPrefixMode() == PrefixMode.DFS) {
+ lease = new AbfsDfsLease(client, relativePath, null, tracingContext);
+ } else {
+ lease = new AbfsBlobLease(client, relativePath, null, tracingContext);
+ }
leaseRefs.put(lease, null);
return lease;
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java
index 32fa802200ac8a..b294ad218cec35 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java
@@ -74,6 +74,11 @@ public final class AbfsHttpConstants {
public static final String HTTP_METHOD_PATCH = "PATCH";
public static final String HTTP_METHOD_POST = "POST";
public static final String HTTP_METHOD_PUT = "PUT";
+ /**
+ * All status codes less than http 100 signify error
+ * and should qualify for retry.
+ */
+ public static final int HTTP_CONTINUE = 100;
// Abfs generic constants
public static final String SINGLE_WHITE_SPACE = " ";
@@ -120,6 +125,9 @@ public final class AbfsHttpConstants {
public static final String DEFAULT_SCOPE = "default:";
public static final String PERMISSION_FORMAT = "%04d";
public static final String SUPER_USER = "$superuser";
+ // The HTTP 100 Continue informational status response code indicates that everything so far
+ // is OK and that the client should continue with the request or ignore it if it is already finished.
+ public static final String HUNDRED_CONTINUE = "100-continue";
public static final char CHAR_FORWARD_SLASH = '/';
public static final char CHAR_EXCLAMATION_POINT = '!';
@@ -143,6 +151,7 @@ public final class AbfsHttpConstants {
public static final String COPY_STATUS_ABORTED = "aborted";
public static final String COPY_STATUS_FAILED = "failed";
public static final String HDI_ISFOLDER = "hdi_isfolder";
+ public static final Integer BLOB_LEASE_ONE_MINUTE_DURATION = 60;
private AbfsHttpConstants() {}
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
index f48dee5c27310c..23989b9f3444fb 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java
@@ -35,9 +35,15 @@ public final class ConfigurationKeys {
* path to determine HNS status.
*/
public static final String FS_AZURE_ACCOUNT_IS_HNS_ENABLED = "fs.azure.account.hns.enabled";
+ /**
+ * Enable or disable expect hundred continue header.
+ * Value: {@value}.
+ */
+ public static final String FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED = "fs.azure.account.expect.header.enabled";
public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME = "fs.azure.account.key";
public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME_REGX = "fs\\.azure\\.account\\.key\\.(.*)";
public static final String FS_AZURE_SECURE_MODE = "fs.azure.secure.mode";
+ public static final String FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = "fs.azure.account.throttling.enabled";
// Retry strategy defined by the user
public static final String AZURE_MIN_BACKOFF_INTERVAL = "fs.azure.io.retry.min.backoff.interval";
@@ -116,6 +122,8 @@ public final class ConfigurationKeys {
public static final String AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION = "fs.azure.createRemoteFileSystemDuringInitialization";
public static final String AZURE_SKIP_USER_GROUP_METADATA_DURING_INITIALIZATION = "fs.azure.skipUserGroupMetadataDuringInitialization";
public static final String FS_AZURE_ENABLE_AUTOTHROTTLING = "fs.azure.enable.autothrottling";
+ public static final String FS_AZURE_ACCOUNT_OPERATION_IDLE_TIMEOUT = "fs.azure.account.operation.idle.timeout";
+ public static final String FS_AZURE_ANALYSIS_PERIOD = "fs.azure.analysis.period";
public static final String FS_AZURE_ALWAYS_USE_HTTPS = "fs.azure.always.use.https";
public static final String FS_AZURE_ATOMIC_RENAME_KEY = "fs.azure.atomic.rename.key";
/** This config ensures that during create overwrite an existing file will be
@@ -262,10 +270,12 @@ public static String accountProperty(String property, String account) {
public static final String FS_AZURE_ENABLE_BLOB_ENDPOINT = "fs.azure.enable.blob.endpoint";
public static final String FS_AZURE_MKDIRS_FALLBACK_TO_DFS = "fs.azure.mkdirs.fallback.to.dfs";
public static final String FS_AZURE_INGRESS_FALLBACK_TO_DFS = "fs.azure.ingress.fallback.to.dfs";
+ public static final String FS_AZURE_READ_FALLBACK_TO_DFS = "fs.azure.read.fallback.to.dfs";
public static final String FS_AZURE_REDIRECT_DELETE = "fs.azure.redirect.delete";
public static final String FS_AZURE_REDIRECT_RENAME = "fs.azure.redirect.rename";
- public static final String FS_AZURE_MAX_CONSUMER_LAG = "fs.azure.max.consumer.lag";
+ public static final String FS_AZURE_PRODUCER_QUEUE_MAX_SIZE = "fs.azure.producer.queue.max.size";
+ public static final String FS_AZURE_LEASE_CREATE_NON_RECURSIVE = "fs.azure.lease.create.non.recursive";
private ConfigurationKeys() {}
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
index 23aefcafb8c68a..9efb16ea746100 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java
@@ -32,7 +32,7 @@
public final class FileSystemConfigurations {
public static final String DEFAULT_FS_AZURE_ACCOUNT_IS_HNS_ENABLED = "";
-
+ public static final boolean DEFAULT_FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED = true;
public static final String USER_HOME_DIRECTORY_PREFIX = "/user";
private static final int SIXTY_SECONDS = 60 * 1000;
@@ -98,6 +98,9 @@ public final class FileSystemConfigurations {
public static final boolean DEFAULT_ENABLE_FLUSH = true;
public static final boolean DEFAULT_DISABLE_OUTPUTSTREAM_FLUSH = true;
public static final boolean DEFAULT_ENABLE_AUTOTHROTTLING = true;
+ public static final boolean DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = true;
+ public static final int DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS = 60_000;
+ public static final int DEFAULT_ANALYSIS_PERIOD_MS = 10_000;
public static final DelegatingSSLSocketFactory.SSLChannelMode DEFAULT_FS_AZURE_SSL_CHANNEL_MODE
= DelegatingSSLSocketFactory.SSLChannelMode.Default;
@@ -123,11 +126,15 @@ public final class FileSystemConfigurations {
public static final boolean DEFAULT_FS_AZURE_ENABLE_BLOBENDPOINT = false;
public static final boolean DEFAULT_FS_AZURE_MKDIRS_FALLBACK_TO_DFS = false;
public static final boolean DEFAULT_FS_AZURE_INGRESS_FALLBACK_TO_DFS = false;
+ public static final boolean DEFAULT_AZURE_READ_FALLBACK_TO_DFS = false;
// To have functionality similar to drop1 delete is going to wasb by default for now.
public static final boolean DEFAULT_FS_AZURE_REDIRECT_RENAME = false;
public static final boolean DEFAULT_FS_AZURE_REDIRECT_DELETE = true;
- public static final int DEFAULT_FS_AZURE_MAX_CONSUMER_LAG = 7000;
+ public static final int DEFAULT_FS_AZURE_PRODUCER_QUEUE_MAX_SIZE = 10000;
+ public static final boolean DEFAULT_FS_AZURE_LEASE_CREATE_NON_RECURSIVE = false;
+
+ public static final int DEFAULT_FS_AZURE_BLOB_RENAME_THREAD = 5;
/**
* Limit of queued block upload operations before writes
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java
index d40d89f1a0d51e..cd3c321b4395d0 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java
@@ -77,6 +77,8 @@ public final class HttpHeaderConfigurations {
public static final String X_MS_COPY_SOURCE = "x-ms-copy-source";
public static final String X_MS_COPY_STATUS_DESCRIPTION = "x-ms-copy-status-description";
public static final String X_MS_COPY_STATUS = "x-ms-copy-status";
+ public static final String EXPECT = "Expect";
+ public static final String X_MS_METADATA_PREFIX = "x-ms-meta-";
private HttpHeaderConfigurations() {}
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java
index aba1d8c1efa2b3..147cb6d83cb0ba 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java
@@ -29,12 +29,33 @@
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class InvalidAbfsRestOperationException extends AbfsRestOperationException {
+
+ private static final String ERROR_MESSAGE = "InvalidAbfsRestOperationException";
+
public InvalidAbfsRestOperationException(
final Exception innerException) {
super(
AzureServiceErrorCode.UNKNOWN.getStatusCode(),
AzureServiceErrorCode.UNKNOWN.getErrorCode(),
- "InvalidAbfsRestOperationException",
+ innerException != null
+ ? innerException.toString()
+ : ERROR_MESSAGE,
innerException);
}
+
+ /**
+ * Adds the retry count along with the exception.
+ * @param innerException The inner exception which is originally caught.
+ * @param retryCount The retry count when the exception was thrown.
+ */
+ public InvalidAbfsRestOperationException(
+ final Exception innerException, int retryCount) {
+ super(
+ AzureServiceErrorCode.UNKNOWN.getStatusCode(),
+ AzureServiceErrorCode.UNKNOWN.getErrorCode(),
+ innerException != null
+ ? innerException.toString()
+ : ERROR_MESSAGE + " RetryCount: " + retryCount,
+ innerException);
+ }
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java
index 7369bfaf56422c..57e559a60ec844 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java
@@ -34,19 +34,22 @@ public enum Mode {
private final Mode mode;
private final boolean isAppendBlob;
private final String leaseId;
+ private boolean isExpectHeaderEnabled;
public AppendRequestParameters(final long position,
final int offset,
final int length,
final Mode mode,
final boolean isAppendBlob,
- final String leaseId) {
+ final String leaseId,
+ final boolean isExpectHeaderEnabled) {
this.position = position;
this.offset = offset;
this.length = length;
this.mode = mode;
this.isAppendBlob = isAppendBlob;
this.leaseId = leaseId;
+ this.isExpectHeaderEnabled = isExpectHeaderEnabled;
}
public long getPosition() {
@@ -72,4 +75,12 @@ public boolean isAppendBlob() {
public String getLeaseId() {
return this.leaseId;
}
+
+ public boolean isExpectHeaderEnabled() {
+ return isExpectHeaderEnabled;
+ }
+
+ public void setExpectHeaderEnabled(boolean expectHeaderEnabled) {
+ isExpectHeaderEnabled = expectHeaderEnabled;
+ }
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java
index 22e9d46a75d445..c5f59e96905d82 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java
@@ -44,6 +44,7 @@ public interface SASTokenProvider {
String GET_PROPERTIES_OPERATION = "get-properties";
String GET_BLOB_PROPERTIES_OPERATION = "get-blob-properties";
String GET_CONTAINER_PROPERTIES_OPERATION = "get-container-properties";
+ String GET_BLOB_METADATA_OPERATION = "get-blob-metadata";
String LIST_OPERATION = "list";
String LIST_BLOB_OPERATION = "list-blob";
String COPY_BLOB_DESTINATION = "copy-blob-dst";
@@ -55,6 +56,7 @@ public interface SASTokenProvider {
String SET_OWNER_OPERATION = "set-owner";
String SET_PERMISSION_OPERATION = "set-permission";
String SET_PROPERTIES_OPERATION = "set-properties";
+ String SET_BLOB_METADATA_OPERATION = "set-blob-metadata";
String WRITE_OPERATION = "write";
String LEASE_OPERATION = "lease";
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobLease.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobLease.java
index 40245f36535c91..e254ff98f6444c 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobLease.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobLease.java
@@ -23,56 +23,47 @@
import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_ID;
-public class AbfsBlobLease {
- private String leaseId;
- private Long leaseRenewLastEpoch;
- private final TracingContext tracingContext;
- private final AbfsClient client;
- private final String path;
- private final Integer ONE_MINUTE = 60;
- private final Long RENEW_TIME = 30 * 1_000L;
- private Boolean freed = false;
+public class AbfsBlobLease extends AbfsLease {
- public AbfsBlobLease(AbfsClient client,
- String path,
- TracingContext tracingContext) throws
- AzureBlobFileSystemException {
- this.client = client;
- this.path = path;
- this.tracingContext = tracingContext;
- AbfsRestOperation op = client.acquireBlobLease(path, ONE_MINUTE,
- tracingContext);
- extractLeaseInfo(op);
+ public AbfsBlobLease(final AbfsClient client,
+ final String path,
+ final Integer leaseDuration,
+ final TracingContext tracingContext) throws AzureBlobFileSystemException {
+ super(client, path, leaseDuration, tracingContext);
}
- private void extractLeaseInfo(final AbfsRestOperation op) {
- leaseId = op.getResult().getResponseHeader(X_MS_LEASE_ID);
- leaseRenewLastEpoch = System.currentTimeMillis();
+ public AbfsBlobLease(final AbfsClient client,
+ final String path,
+ final int acquireMaxRetries,
+ final int acquireRetryInterval,
+ final Integer leaseDuration,
+ final TracingContext tracingContext) throws AzureBlobFileSystemException {
+ super(client, path, acquireMaxRetries, acquireRetryInterval, leaseDuration,
+ tracingContext);
}
- public String getLeaseId() {
- return leaseId;
+ @Override
+ String callRenewLeaseAPI(final String path,
+ final String leaseId,
+ final TracingContext tracingContext) throws AzureBlobFileSystemException {
+ return extractLeaseInfo(client.renewBlobLease(path, leaseId, tracingContext));
}
- public void renewIfRequired() throws AzureBlobFileSystemException {
- if (System.currentTimeMillis() - leaseRenewLastEpoch >= RENEW_TIME) {
- renew();
- }
+ @Override
+ AbfsRestOperation callAcquireLeaseAPI(final String path,
+ final Integer leaseDuration,
+ final TracingContext tracingContext) throws AzureBlobFileSystemException {
+ return client.acquireBlobLease(path, leaseDuration, tracingContext);
}
- private synchronized void renew() throws AzureBlobFileSystemException {
- if (System.currentTimeMillis() - leaseRenewLastEpoch < RENEW_TIME) {
- return;
- }
- AbfsRestOperation op = client.renewBlobLease(path, leaseId, tracingContext);
- extractLeaseInfo(op);
+ @Override
+ void callReleaseLeaseAPI(final String path,
+ final String leaseID,
+ final TracingContext tracingContext) throws AzureBlobFileSystemException {
+ client.releaseBlobLease(path, leaseID, tracingContext);
}
- public synchronized void free() throws AzureBlobFileSystemException {
- if (freed) {
- return;
- }
- client.releaseBlobLease(path, leaseId, tracingContext);
- freed = true;
+ private String extractLeaseInfo(final AbfsRestOperation op) {
+ return op.getResult().getResponseHeader(X_MS_LEASE_ID);
}
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java
index e961e13ac399ff..bb2cca57016185 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java
@@ -71,6 +71,7 @@
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
import org.apache.hadoop.util.concurrent.HadoopExecutors;
+import static java.net.HttpURLConnection.HTTP_CONFLICT;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.*;
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_DELETE_CONSIDERED_IDEMPOTENT;
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.SERVER_SIDE_ENCRYPTION_ALGORITHM;
@@ -79,6 +80,7 @@
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.WASB_DNS_PREFIX;
import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.*;
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.*;
+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
/**
* AbfsClient.
@@ -102,6 +104,7 @@ public class AbfsClient implements Closeable {
private AccessTokenProvider tokenProvider;
private SASTokenProvider sasTokenProvider;
private final AbfsCounters abfsCounters;
+ private final AbfsThrottlingIntercept intercept;
private final ListeningScheduledExecutorService executorService;
@@ -117,6 +120,7 @@ private AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCreden
this.retryPolicy = abfsClientContext.getExponentialRetryPolicy();
this.accountName = abfsConfiguration.getAccountName().substring(0, abfsConfiguration.getAccountName().indexOf(AbfsHttpConstants.DOT));
this.authType = abfsConfiguration.getAuthType(accountName);
+ this.intercept = AbfsThrottlingInterceptFactory.getInstance(accountName, abfsConfiguration);
String encryptionKey = this.abfsConfiguration
.getClientProvidedEncryptionKey();
@@ -222,6 +226,10 @@ SharedKeyCredentials getSharedKeyCredentials() {
return sharedKeyCredentials;
}
+ AbfsThrottlingIntercept getIntercept() {
+ return intercept;
+ }
+
List createDefaultHeaders() {
final List requestHeaders = new ArrayList();
requestHeaders.add(new AbfsHttpHeader(X_MS_VERSION, xMsVersion));
@@ -420,7 +428,7 @@ public AbfsRestOperation createPath(final String path, final boolean isFile, fin
if (!op.hasResult()) {
throw ex;
}
- if (!isFile && op.getResult().getStatusCode() == HttpURLConnection.HTTP_CONFLICT) {
+ if (!isFile && op.getResult().getStatusCode() == HTTP_CONFLICT) {
String existingResource =
op.getResult().getResponseHeader(X_MS_EXISTING_RESOURCE_TYPE);
if (existingResource != null && existingResource.equals(DIRECTORY)) {
@@ -471,7 +479,7 @@ public AbfsRestOperation createPathBlob(final String path, final boolean isFile,
if (!op.hasResult()) {
throw ex;
}
- if (!isFile && op.getResult().getStatusCode() == HttpURLConnection.HTTP_CONFLICT) {
+ if (!isFile && op.getResult().getStatusCode() == HTTP_CONFLICT) {
// This ensures that we don't throw ex only for existing directory but if a blob exists we throw exception.
tracingContext.setFallbackDFSAppend(tracingContext.getFallbackDFSAppend() + "M");
AbfsRestOperation blobProperty = getBlobProperty(new Path(path), tracingContext);
@@ -686,6 +694,9 @@ public AbfsRestOperation append(final String path, final byte[] buffer,
throws AzureBlobFileSystemException {
final List requestHeaders = createDefaultHeaders();
addCustomerProvidedKeyHeaders(requestHeaders);
+ if (reqParams.isExpectHeaderEnabled()) {
+ requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE));
+ }
// JDK7 does not support PATCH, so to workaround the issue we will use
// PUT and specify the real method in the X-Http-Method-Override header.
requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE,
@@ -714,19 +725,33 @@ public AbfsRestOperation append(final String path, final byte[] buffer,
if (url.toString().contains(WASB_DNS_PREFIX)) {
url = changePrefixFromBlobtoDfs(url);
}
- final AbfsRestOperation op = new AbfsRestOperation(
- AbfsRestOperationType.Append,
- this,
- HTTP_METHOD_PUT,
- url,
- requestHeaders,
- buffer,
- reqParams.getoffset(),
- reqParams.getLength(),
- sasTokenForReuse);
+ final AbfsRestOperation op = getAbfsRestOperationForAppend(AbfsRestOperationType.Append,
+ HTTP_METHOD_PUT,
+ url,
+ requestHeaders,
+ buffer,
+ reqParams.getoffset(),
+ reqParams.getLength(),
+ sasTokenForReuse);
try {
op.execute(tracingContext);
} catch (AzureBlobFileSystemException e) {
+ /*
+ If the http response code indicates a user error we retry
+ the same append request with expect header being disabled.
+ When "100-continue" header is enabled but a non Http 100 response comes,
+ the response message might not get set correctly by the server.
+ So, this handling is to avoid breaking of backward compatibility
+ if someone has taken dependency on the exception message,
+ which is created using the error string present in the response header.
+ */
+ int responseStatusCode = ((AbfsRestOperationException) e).getStatusCode();
+ if (checkUserError(responseStatusCode) && reqParams.isExpectHeaderEnabled()) {
+ LOG.debug("User error, retrying without 100 continue enabled for the given path {}", path);
+ reqParams.setExpectHeaderEnabled(false);
+ return this.append(path, buffer, reqParams, cachedSasToken,
+ tracingContext);
+ }
// If we have no HTTP response, throw the original exception.
if (!op.hasResult()) {
throw e;
@@ -734,16 +759,15 @@ public AbfsRestOperation append(final String path, final byte[] buffer,
if (reqParams.isAppendBlob()
&& appendSuccessCheckOp(op, path,
(reqParams.getPosition() + reqParams.getLength()), tracingContext)) {
- final AbfsRestOperation successOp = new AbfsRestOperation(
- AbfsRestOperationType.Append,
- this,
- HTTP_METHOD_PUT,
- url,
- requestHeaders,
- buffer,
- reqParams.getoffset(),
- reqParams.getLength(),
- sasTokenForReuse);
+ final AbfsRestOperation successOp = getAbfsRestOperationForAppend(
+ AbfsRestOperationType.Append,
+ HTTP_METHOD_PUT,
+ url,
+ requestHeaders,
+ buffer,
+ reqParams.getoffset(),
+ reqParams.getLength(),
+ sasTokenForReuse);
successOp.hardSetResult(HttpURLConnection.HTTP_OK);
return successOp;
}
@@ -773,7 +797,9 @@ public AbfsRestOperation append(final String blockId, final String path, final b
if (reqParams.getLeaseId() != null) {
requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, reqParams.getLeaseId()));
}
-
+ if (reqParams.isExpectHeaderEnabled()) {
+ requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE));
+ }
final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder();
abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, BLOCK);
abfsUriQueryBuilder.addQuery(QUERY_PARAM_BLOCKID, blockId);
@@ -795,7 +821,29 @@ public AbfsRestOperation append(final String blockId, final String path, final b
reqParams.getoffset(),
reqParams.getLength(),
sasTokenForReuse);
- op.execute(tracingContext);
+ try {
+ op.execute(tracingContext);
+ } catch (AzureBlobFileSystemException e) {
+ /*
+ If the http response code indicates a user error we retry
+ the same append request with expect header being disabled.
+ When "100-continue" header is enabled but a non Http 100 response comes,
+ the response message might not get set correctly by the server.
+ So, this handling is to avoid breaking of backward compatibility
+ if someone has taken dependency on the exception message,
+ which is created using the error string present in the response header.
+ */
+ int responseStatusCode = ((AbfsRestOperationException) e).getStatusCode();
+ if (checkUserErrorBlob(responseStatusCode) && reqParams.isExpectHeaderEnabled()) {
+ LOG.debug("User error, retrying without 100 continue enabled for the given path {}", path);
+ reqParams.setExpectHeaderEnabled(false);
+ return this.append(blockId, path, buffer, reqParams, cachedSasToken,
+ tracingContext, eTag);
+ }
+ else {
+ throw e;
+ }
+ }
return op;
}
@@ -843,6 +891,61 @@ public AbfsRestOperation flush(byte[] buffer, final String path, boolean isClose
return op;
}
+ /*
+ * Returns the rest operation for append.
+ * @param operationType The AbfsRestOperationType.
+ * @param httpMethod specifies the httpMethod.
+ * @param url specifies the url.
+ * @param requestHeaders This includes the list of request headers.
+ * @param buffer The buffer to write into.
+ * @param bufferOffset The buffer offset.
+ * @param bufferLength The buffer Length.
+ * @param sasTokenForReuse The sasToken.
+ * @return AbfsRestOperation op.
+ */
+ @VisibleForTesting
+ AbfsRestOperation getAbfsRestOperationForAppend(final AbfsRestOperationType operationType,
+ final String httpMethod,
+ final URL url,
+ final List requestHeaders,
+ final byte[] buffer,
+ final int bufferOffset,
+ final int bufferLength,
+ final String sasTokenForReuse) {
+ return new AbfsRestOperation(
+ operationType,
+ this,
+ httpMethod,
+ url,
+ requestHeaders,
+ buffer,
+ bufferOffset,
+ bufferLength, sasTokenForReuse);
+ }
+
+ /**
+ * Returns true if the status code lies in the range of user error.
+ * @param responseStatusCode http response status code.
+ * @return True or False.
+ */
+ private boolean checkUserError(int responseStatusCode) {
+ return (responseStatusCode >= HttpURLConnection.HTTP_BAD_REQUEST
+ && responseStatusCode < HttpURLConnection.HTTP_INTERNAL_ERROR);
+ }
+
+ /**
+ * Returns true if the status code lies in the range of user error.
+ * In the case of HTTP_CONFLICT for PutBlockList we fallback to DFS and hence
+ * this retry handling is not needed.
+ * @param responseStatusCode http response status code.
+ * @return True or False.
+ */
+ private boolean checkUserErrorBlob(int responseStatusCode) {
+ return (responseStatusCode >= HttpURLConnection.HTTP_BAD_REQUEST
+ && responseStatusCode < HttpURLConnection.HTTP_INTERNAL_ERROR
+ && responseStatusCode != HttpURLConnection.HTTP_CONFLICT);
+ }
+
// For AppendBlob its possible that the append succeeded in the backend but the request failed.
// However a retry would fail with an InvalidQueryParameterValue
// (as the current offset would be unacceptable).
@@ -1007,11 +1110,18 @@ public AbfsRestOperation read(final String path, final long position, final byte
abfsUriQueryBuilder, cachedSasToken);
URL url = createRequestUrl(path, abfsUriQueryBuilder.toString());
- if (url.toString().contains(WASB_DNS_PREFIX)) {
- url = changePrefixFromBlobtoDfs(url);
+ final AbfsRestOperationType opType;
+ if (!OperativeEndpoint.isReadEnabledOnDFS(
+ getAbfsConfiguration().getPrefixMode(), getAbfsConfiguration())) {
+ opType = AbfsRestOperationType.GetBlob;
+ } else {
+ if (url.toString().contains(WASB_DNS_PREFIX)) {
+ url = changePrefixFromBlobtoDfs(url);
+ }
+ opType = AbfsRestOperationType.ReadFile;
}
final AbfsRestOperation op = new AbfsRestOperation(
- AbfsRestOperationType.ReadFile,
+ opType,
this,
HTTP_METHOD_GET,
url,
@@ -1341,6 +1451,9 @@ public AbfsRestOperation getContainerProperty(TracingContext tracingContext) thr
final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder();
abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESTYPE, CONTAINER);
+ appendSASTokenToQuery("",
+ SASTokenProvider.GET_CONTAINER_PROPERTIES_OPERATION, abfsUriQueryBuilder);
+
final URL url = createRequestUrl(abfsUriQueryBuilder.toString());
final AbfsRestOperation op = new AbfsRestOperation(
@@ -1353,6 +1466,71 @@ public AbfsRestOperation getContainerProperty(TracingContext tracingContext) thr
return op;
}
+ /**
+ * Gets user-defined properties(metadata) of the blob over blob endpoint.
+ * @param blobPath
+ * @param tracingContext
+ * @return the user-defined properties on blob path
+ * @throws AzureBlobFileSystemException
+ */
+ public AbfsRestOperation getBlobMetadata(Path blobPath,
+ TracingContext tracingContext) throws AzureBlobFileSystemException {
+ final List requestHeaders = createDefaultHeaders();
+ AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder();
+ abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, QUERY_PARAM_INCLUDE_VALUE_METADATA);
+
+ String blobRelativePath = blobPath.toUri().getPath();
+ appendSASTokenToQuery(blobRelativePath,
+ SASTokenProvider.GET_BLOB_METADATA_OPERATION, abfsUriQueryBuilder);
+
+ final URL url = createRequestUrl(blobRelativePath,
+ abfsUriQueryBuilder.toString());
+
+ final AbfsRestOperation op = new AbfsRestOperation(
+ AbfsRestOperationType.GetBlobMetadata,
+ this,
+ HTTP_METHOD_HEAD,
+ url,
+ requestHeaders);
+ op.execute(tracingContext);
+ return op;
+ }
+
+ /**
+ * Sets user-defined properties(metadata) of the blob over blob endpoint.
+ * @param blobPath
+ * @param metadataRequestHeaders
+ * @param tracingContext
+ * @throws AzureBlobFileSystemException
+ */
+ public AbfsRestOperation setBlobMetadata(Path blobPath, List metadataRequestHeaders,
+ TracingContext tracingContext) throws AzureBlobFileSystemException {
+ // Request Header for this call will also contain metadata headers
+ final List defaultRequestHeaders = createDefaultHeaders();
+ final List requestHeaders = new ArrayList();
+ requestHeaders.addAll(defaultRequestHeaders);
+ requestHeaders.addAll(metadataRequestHeaders);
+
+ AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder();
+ abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, QUERY_PARAM_INCLUDE_VALUE_METADATA);
+
+ String blobRelativePath = blobPath.toUri().getPath();
+ appendSASTokenToQuery(blobRelativePath,
+ SASTokenProvider.SET_BLOB_METADATA_OPERATION, abfsUriQueryBuilder);
+
+ final URL url = createRequestUrl(blobRelativePath,
+ abfsUriQueryBuilder.toString());
+
+ final AbfsRestOperation op = new AbfsRestOperation(
+ AbfsRestOperationType.SetBlobMetadata,
+ this,
+ HTTP_METHOD_PUT,
+ url,
+ requestHeaders);
+ op.execute(tracingContext);
+ return op;
+ }
+
/**
* Call server API BlobList.
*
@@ -1658,4 +1836,9 @@ public void addCallback(ListenableFuture future, FutureCallback callba
AbfsConfiguration getAbfsConfiguration() {
return abfsConfiguration;
}
+
+ @VisibleForTesting
+ protected AccessTokenProvider getTokenProvider() {
+ return tokenProvider;
+ }
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingAnalyzer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingAnalyzer.java
index a55c924dd81524..2060de6f14a97e 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingAnalyzer.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingAnalyzer.java
@@ -20,27 +20,30 @@
import java.util.Timer;
import java.util.TimerTask;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
-import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
-import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
+import org.apache.hadoop.util.Preconditions;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.hadoop.util.Time.now;
+
class AbfsClientThrottlingAnalyzer {
private static final Logger LOG = LoggerFactory.getLogger(
- AbfsClientThrottlingAnalyzer.class);
- private static final int DEFAULT_ANALYSIS_PERIOD_MS = 10 * 1000;
+ AbfsClientThrottlingAnalyzer.class);
private static final int MIN_ANALYSIS_PERIOD_MS = 1000;
private static final int MAX_ANALYSIS_PERIOD_MS = 30000;
private static final double MIN_ACCEPTABLE_ERROR_PERCENTAGE = .1;
private static final double MAX_EQUILIBRIUM_ERROR_PERCENTAGE = 1;
private static final double RAPID_SLEEP_DECREASE_FACTOR = .75;
private static final double RAPID_SLEEP_DECREASE_TRANSITION_PERIOD_MS = 150
- * 1000;
+ * 1000;
private static final double SLEEP_DECREASE_FACTOR = .975;
private static final double SLEEP_INCREASE_FACTOR = 1.05;
private int analysisPeriodMs;
@@ -50,49 +53,86 @@ class AbfsClientThrottlingAnalyzer {
private String name = null;
private Timer timer = null;
private AtomicReference blobMetrics = null;
+ private AtomicLong lastExecutionTime = null;
+ private final AtomicBoolean isOperationOnAccountIdle = new AtomicBoolean(false);
+ private AbfsConfiguration abfsConfiguration = null;
+ private boolean accountLevelThrottlingEnabled = true;
private AbfsClientThrottlingAnalyzer() {
// hide default constructor
}
- /**
- * Creates an instance of the AbfsClientThrottlingAnalyzer class with
- * the specified name.
- *
- * @param name a name used to identify this instance.
- * @throws IllegalArgumentException if name is null or empty.
- */
- AbfsClientThrottlingAnalyzer(String name) throws IllegalArgumentException {
- this(name, DEFAULT_ANALYSIS_PERIOD_MS);
- }
-
/**
* Creates an instance of the AbfsClientThrottlingAnalyzer class with
* the specified name and period.
*
* @param name A name used to identify this instance.
- * @param period The frequency, in milliseconds, at which metrics are
- * analyzed.
+ * @param abfsConfiguration The configuration set.
* @throws IllegalArgumentException If name is null or empty.
* If period is less than 1000 or greater than 30000 milliseconds.
*/
- AbfsClientThrottlingAnalyzer(String name, int period)
- throws IllegalArgumentException {
+ AbfsClientThrottlingAnalyzer(String name, AbfsConfiguration abfsConfiguration)
+ throws IllegalArgumentException {
Preconditions.checkArgument(
- StringUtils.isNotEmpty(name),
- "The argument 'name' cannot be null or empty.");
+ StringUtils.isNotEmpty(name),
+ "The argument 'name' cannot be null or empty.");
+ int period = abfsConfiguration.getAnalysisPeriod();
Preconditions.checkArgument(
- period >= MIN_ANALYSIS_PERIOD_MS && period <= MAX_ANALYSIS_PERIOD_MS,
- "The argument 'period' must be between 1000 and 30000.");
+ period >= MIN_ANALYSIS_PERIOD_MS && period <= MAX_ANALYSIS_PERIOD_MS,
+ "The argument 'period' must be between 1000 and 30000.");
this.name = name;
- this.analysisPeriodMs = period;
+ this.abfsConfiguration = abfsConfiguration;
+ this.accountLevelThrottlingEnabled = abfsConfiguration.accountThrottlingEnabled();
+ this.analysisPeriodMs = abfsConfiguration.getAnalysisPeriod();
+ this.lastExecutionTime = new AtomicLong(now());
this.blobMetrics = new AtomicReference(
- new AbfsOperationMetrics(System.currentTimeMillis()));
+ new AbfsOperationMetrics(System.currentTimeMillis()));
this.timer = new Timer(
- String.format("abfs-timer-client-throttling-analyzer-%s", name), true);
+ String.format("abfs-timer-client-throttling-analyzer-%s", name), true);
this.timer.schedule(new TimerTaskImpl(),
- analysisPeriodMs,
- analysisPeriodMs);
+ analysisPeriodMs,
+ analysisPeriodMs);
+ }
+
+ /**
+ * Resumes the timer if it was stopped.
+ */
+ private void resumeTimer() {
+ blobMetrics = new AtomicReference(
+ new AbfsOperationMetrics(System.currentTimeMillis()));
+ timer.schedule(new TimerTaskImpl(),
+ analysisPeriodMs,
+ analysisPeriodMs);
+ isOperationOnAccountIdle.set(false);
+ }
+
+ /**
+ * Synchronized method to suspend or resume timer.
+ * @param timerFunctionality resume or suspend.
+ * @param timerTask The timertask object.
+ * @return true or false.
+ */
+ private synchronized boolean timerOrchestrator(TimerFunctionality timerFunctionality,
+ TimerTask timerTask) {
+ switch (timerFunctionality) {
+ case RESUME:
+ if (isOperationOnAccountIdle.get()) {
+ resumeTimer();
+ }
+ break;
+ case SUSPEND:
+ if (accountLevelThrottlingEnabled && (System.currentTimeMillis()
+ - lastExecutionTime.get() >= getOperationIdleTimeout())) {
+ isOperationOnAccountIdle.set(true);
+ timerTask.cancel();
+ timer.purge();
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+ return false;
}
/**
@@ -104,12 +144,13 @@ private AbfsClientThrottlingAnalyzer() {
public void addBytesTransferred(long count, boolean isFailedOperation) {
AbfsOperationMetrics metrics = blobMetrics.get();
if (isFailedOperation) {
- metrics.bytesFailed.addAndGet(count);
- metrics.operationsFailed.incrementAndGet();
+ metrics.addBytesFailed(count);
+ metrics.incrementOperationsFailed();
} else {
- metrics.bytesSuccessful.addAndGet(count);
- metrics.operationsSuccessful.incrementAndGet();
+ metrics.addBytesSuccessful(count);
+ metrics.incrementOperationsSuccessful();
}
+ blobMetrics.set(metrics);
}
/**
@@ -117,6 +158,8 @@ public void addBytesTransferred(long count, boolean isFailedOperation) {
* @return true if Thread sleeps(Throttling occurs) else false.
*/
public boolean suspendIfNecessary() {
+ lastExecutionTime.set(now());
+ timerOrchestrator(TimerFunctionality.RESUME, null);
int duration = sleepDuration;
if (duration > 0) {
try {
@@ -134,19 +177,27 @@ int getSleepDuration() {
return sleepDuration;
}
+ int getOperationIdleTimeout() {
+ return abfsConfiguration.getAccountOperationIdleTimeout();
+ }
+
+ AtomicBoolean getIsOperationOnAccountIdle() {
+ return isOperationOnAccountIdle;
+ }
+
private int analyzeMetricsAndUpdateSleepDuration(AbfsOperationMetrics metrics,
int sleepDuration) {
final double percentageConversionFactor = 100;
- double bytesFailed = metrics.bytesFailed.get();
- double bytesSuccessful = metrics.bytesSuccessful.get();
- double operationsFailed = metrics.operationsFailed.get();
- double operationsSuccessful = metrics.operationsSuccessful.get();
+ double bytesFailed = metrics.getBytesFailed().get();
+ double bytesSuccessful = metrics.getBytesSuccessful().get();
+ double operationsFailed = metrics.getOperationsFailed().get();
+ double operationsSuccessful = metrics.getOperationsSuccessful().get();
double errorPercentage = (bytesFailed <= 0)
- ? 0
- : (percentageConversionFactor
- * bytesFailed
- / (bytesFailed + bytesSuccessful));
- long periodMs = metrics.endTime - metrics.startTime;
+ ? 0
+ : (percentageConversionFactor
+ * bytesFailed
+ / (bytesFailed + bytesSuccessful));
+ long periodMs = metrics.getEndTime() - metrics.getStartTime();
double newSleepDuration;
@@ -154,10 +205,10 @@ private int analyzeMetricsAndUpdateSleepDuration(AbfsOperationMetrics metrics,
++consecutiveNoErrorCount;
// Decrease sleepDuration in order to increase throughput.
double reductionFactor =
- (consecutiveNoErrorCount * analysisPeriodMs
- >= RAPID_SLEEP_DECREASE_TRANSITION_PERIOD_MS)
- ? RAPID_SLEEP_DECREASE_FACTOR
- : SLEEP_DECREASE_FACTOR;
+ (consecutiveNoErrorCount * analysisPeriodMs
+ >= RAPID_SLEEP_DECREASE_TRANSITION_PERIOD_MS)
+ ? RAPID_SLEEP_DECREASE_FACTOR
+ : SLEEP_DECREASE_FACTOR;
newSleepDuration = sleepDuration * reductionFactor;
} else if (errorPercentage < MAX_EQUILIBRIUM_ERROR_PERCENTAGE) {
@@ -176,15 +227,15 @@ private int analyzeMetricsAndUpdateSleepDuration(AbfsOperationMetrics metrics,
double additionalDelayNeeded = 5 * analysisPeriodMs;
if (bytesSuccessful > 0) {
additionalDelayNeeded = (bytesSuccessful + bytesFailed)
- * periodMs
- / bytesSuccessful
- - periodMs;
+ * periodMs
+ / bytesSuccessful
+ - periodMs;
}
// amortize the additional delay needed across the estimated number of
// requests during the next period
newSleepDuration = additionalDelayNeeded
- / (operationsFailed + operationsSuccessful);
+ / (operationsFailed + operationsSuccessful);
final double maxSleepDuration = analysisPeriodMs;
final double minSleepDuration = sleepDuration * SLEEP_INCREASE_FACTOR;
@@ -201,16 +252,16 @@ private int analyzeMetricsAndUpdateSleepDuration(AbfsOperationMetrics metrics,
if (LOG.isDebugEnabled()) {
LOG.debug(String.format(
- "%5.5s, %10d, %10d, %10d, %10d, %6.2f, %5d, %5d, %5d",
- name,
- (int) bytesFailed,
- (int) bytesSuccessful,
- (int) operationsFailed,
- (int) operationsSuccessful,
- errorPercentage,
- periodMs,
- (int) sleepDuration,
- (int) newSleepDuration));
+ "%5.5s, %10d, %10d, %10d, %10d, %6.2f, %5d, %5d, %5d",
+ name,
+ (int) bytesFailed,
+ (int) bytesSuccessful,
+ (int) operationsFailed,
+ (int) operationsSuccessful,
+ errorPercentage,
+ periodMs,
+ (int) sleepDuration,
+ (int) newSleepDuration));
}
return (int) newSleepDuration;
@@ -238,12 +289,15 @@ public void run() {
}
long now = System.currentTimeMillis();
- if (now - blobMetrics.get().startTime >= analysisPeriodMs) {
+ if (timerOrchestrator(TimerFunctionality.SUSPEND, this)) {
+ return;
+ }
+ if (now - blobMetrics.get().getStartTime() >= analysisPeriodMs) {
AbfsOperationMetrics oldMetrics = blobMetrics.getAndSet(
- new AbfsOperationMetrics(now));
- oldMetrics.endTime = now;
+ new AbfsOperationMetrics(now));
+ oldMetrics.setEndTime(now);
sleepDuration = analyzeMetricsAndUpdateSleepDuration(oldMetrics,
- sleepDuration);
+ sleepDuration);
}
} finally {
if (doWork) {
@@ -252,24 +306,4 @@ public void run() {
}
}
}
-
- /**
- * Stores Abfs operation metrics during each analysis period.
- */
- static class AbfsOperationMetrics {
- private AtomicLong bytesFailed;
- private AtomicLong bytesSuccessful;
- private AtomicLong operationsFailed;
- private AtomicLong operationsSuccessful;
- private long endTime;
- private long startTime;
-
- AbfsOperationMetrics(long startTime) {
- this.startTime = startTime;
- this.bytesFailed = new AtomicLong();
- this.bytesSuccessful = new AtomicLong();
- this.operationsFailed = new AtomicLong();
- this.operationsSuccessful = new AtomicLong();
- }
- }
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java
index 7303e833418db7..3bb225d4be862d 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java
@@ -19,13 +19,17 @@
package org.apache.hadoop.fs.azurebfs.services;
import java.net.HttpURLConnection;
+import java.util.concurrent.locks.ReentrantLock;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
import org.apache.hadoop.fs.azurebfs.AbfsStatistic;
import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations;
+import static java.net.HttpURLConnection.HTTP_UNAVAILABLE;
+
/**
* Throttles Azure Blob File System read and write operations to achieve maximum
* throughput by minimizing errors. The errors occur when the account ingress
@@ -38,35 +42,101 @@
* and sleeps just enough to minimize errors, allowing optimal ingress and/or
* egress throughput.
*/
-public final class AbfsClientThrottlingIntercept {
+public final class AbfsClientThrottlingIntercept implements AbfsThrottlingIntercept {
private static final Logger LOG = LoggerFactory.getLogger(
AbfsClientThrottlingIntercept.class);
private static final String RANGE_PREFIX = "bytes=";
- private static AbfsClientThrottlingIntercept singleton = null;
- private AbfsClientThrottlingAnalyzer readThrottler = null;
- private AbfsClientThrottlingAnalyzer writeThrottler = null;
- private static boolean isAutoThrottlingEnabled = false;
+ private static AbfsClientThrottlingIntercept singleton; // singleton, initialized in static initialization block
+ private static final ReentrantLock LOCK = new ReentrantLock();
+ private final AbfsClientThrottlingAnalyzer readThrottler;
+ private final AbfsClientThrottlingAnalyzer writeThrottler;
+ private final String accountName;
// Hide default constructor
- private AbfsClientThrottlingIntercept() {
- readThrottler = new AbfsClientThrottlingAnalyzer("read");
- writeThrottler = new AbfsClientThrottlingAnalyzer("write");
+ public AbfsClientThrottlingIntercept(String accountName, AbfsConfiguration abfsConfiguration) {
+ this.accountName = accountName;
+ this.readThrottler = setAnalyzer("read " + accountName, abfsConfiguration);
+ this.writeThrottler = setAnalyzer("write " + accountName, abfsConfiguration);
+ LOG.debug("Client-side throttling is enabled for the ABFS file system for the account : {}", accountName);
}
- public static synchronized void initializeSingleton(boolean enableAutoThrottling) {
- if (!enableAutoThrottling) {
- return;
- }
+ // Hide default constructor
+ private AbfsClientThrottlingIntercept(AbfsConfiguration abfsConfiguration) {
+ // Account name is kept as empty as same instance is shared across all accounts.
+ this.accountName = "";
+ this.readThrottler = setAnalyzer("read", abfsConfiguration);
+ this.writeThrottler = setAnalyzer("write", abfsConfiguration);
+ LOG.debug("Client-side throttling is enabled for the ABFS file system using singleton intercept");
+ }
+
+ /**
+ * Sets the analyzer for the intercept.
+ * @param name Name of the analyzer.
+ * @param abfsConfiguration The configuration.
+ * @return AbfsClientThrottlingAnalyzer instance.
+ */
+ private AbfsClientThrottlingAnalyzer setAnalyzer(String name, AbfsConfiguration abfsConfiguration) {
+ return new AbfsClientThrottlingAnalyzer(name, abfsConfiguration);
+ }
+
+ /**
+ * Returns the analyzer for read operations.
+ * @return AbfsClientThrottlingAnalyzer for read.
+ */
+ AbfsClientThrottlingAnalyzer getReadThrottler() {
+ return readThrottler;
+ }
+
+ /**
+ * Returns the analyzer for write operations.
+ * @return AbfsClientThrottlingAnalyzer for write.
+ */
+ AbfsClientThrottlingAnalyzer getWriteThrottler() {
+ return writeThrottler;
+ }
+
+ /**
+ * Creates a singleton object of the AbfsClientThrottlingIntercept.
+ * which is shared across all filesystem instances.
+ * @param abfsConfiguration configuration set.
+ * @return singleton object of intercept.
+ */
+ static AbfsClientThrottlingIntercept initializeSingleton(AbfsConfiguration abfsConfiguration) {
if (singleton == null) {
- singleton = new AbfsClientThrottlingIntercept();
- isAutoThrottlingEnabled = true;
- LOG.debug("Client-side throttling is enabled for the ABFS file system.");
+ LOCK.lock();
+ try {
+ if (singleton == null) {
+ singleton = new AbfsClientThrottlingIntercept(abfsConfiguration);
+ LOG.debug("Client-side throttling is enabled for the ABFS file system.");
+ }
+ } finally {
+ LOCK.unlock();
+ }
}
+ return singleton;
+ }
+
+ /**
+ * Updates the metrics for the case when response code signifies throttling
+ * but there are some expected bytes to be sent.
+ * @param isThrottledOperation returns true if status code is HTTP_UNAVAILABLE
+ * @param abfsHttpOperation Used for status code and data transferred.
+ * @return true if the operation is throttled and has some bytes to transfer.
+ */
+ private boolean updateBytesTransferred(boolean isThrottledOperation,
+ AbfsHttpOperation abfsHttpOperation) {
+ return isThrottledOperation && abfsHttpOperation.getExpectedBytesToBeSent() > 0;
}
- static void updateMetrics(AbfsRestOperationType operationType,
- AbfsHttpOperation abfsHttpOperation) {
- if (!isAutoThrottlingEnabled || abfsHttpOperation == null) {
+ /**
+ * Updates the metrics for successful and failed read and write operations.
+ * @param operationType Only applicable for read and write operations.
+ * @param abfsHttpOperation Used for status code and data transferred.
+ */
+ @Override
+ public void updateMetrics(AbfsRestOperationType operationType,
+ AbfsHttpOperation abfsHttpOperation) {
+ if (abfsHttpOperation == null) {
return;
}
@@ -78,11 +148,24 @@ static void updateMetrics(AbfsRestOperationType operationType,
boolean isFailedOperation = (status < HttpURLConnection.HTTP_OK
|| status >= HttpURLConnection.HTTP_INTERNAL_ERROR);
+ // If status code is 503, it is considered as a throttled operation.
+ boolean isThrottledOperation = (status == HTTP_UNAVAILABLE);
+
switch (operationType) {
case Append:
contentLength = abfsHttpOperation.getBytesSent();
+ if (contentLength == 0) {
+ /*
+ Signifies the case where we could not update the bytesSent due to
+ throttling but there were some expectedBytesToBeSent.
+ */
+ if (updateBytesTransferred(isThrottledOperation, abfsHttpOperation)) {
+ LOG.debug("Updating metrics due to throttling for path {}", abfsHttpOperation.getConnUrl().getPath());
+ contentLength = abfsHttpOperation.getExpectedBytesToBeSent();
+ }
+ }
if (contentLength > 0) {
- singleton.writeThrottler.addBytesTransferred(contentLength,
+ writeThrottler.addBytesTransferred(contentLength,
isFailedOperation);
}
break;
@@ -90,7 +173,7 @@ static void updateMetrics(AbfsRestOperationType operationType,
String range = abfsHttpOperation.getConnection().getRequestProperty(HttpHeaderConfigurations.RANGE);
contentLength = getContentLengthIfKnown(range);
if (contentLength > 0) {
- singleton.readThrottler.addBytesTransferred(contentLength,
+ readThrottler.addBytesTransferred(contentLength,
isFailedOperation);
}
break;
@@ -104,21 +187,18 @@ static void updateMetrics(AbfsRestOperationType operationType,
* uses this to suspend the request, if necessary, to minimize errors and
* maximize throughput.
*/
- static void sendingRequest(AbfsRestOperationType operationType,
+ @Override
+ public void sendingRequest(AbfsRestOperationType operationType,
AbfsCounters abfsCounters) {
- if (!isAutoThrottlingEnabled) {
- return;
- }
-
switch (operationType) {
case ReadFile:
- if (singleton.readThrottler.suspendIfNecessary()
+ if (readThrottler.suspendIfNecessary()
&& abfsCounters != null) {
abfsCounters.incrementCounter(AbfsStatistic.READ_THROTTLES, 1);
}
break;
case Append:
- if (singleton.writeThrottler.suspendIfNecessary()
+ if (writeThrottler.suspendIfNecessary()
&& abfsCounters != null) {
abfsCounters.incrementCounter(AbfsStatistic.WRITE_THROTTLES, 1);
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsLease.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsLease.java
new file mode 100644
index 00000000000000..f72658fb789aa5
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsLease.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants;
+import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations;
+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
+import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
+
+public class AbfsDfsLease extends AbfsLease {
+
+ public AbfsDfsLease(final AbfsClient client,
+ final String path,
+ final int acquireMaxRetries,
+ final int acquireRetryInterval,
+ final Integer leaseDuration,
+ final TracingContext tracingContext) throws AzureBlobFileSystemException {
+ super(client, path, acquireMaxRetries, acquireRetryInterval, leaseDuration,
+ tracingContext);
+ }
+
+ public AbfsDfsLease(final AbfsClient client,
+ final String path,
+ final Integer leaseDuration,
+ final TracingContext tracingContext) throws AzureBlobFileSystemException {
+ super(client, path, leaseDuration, tracingContext);
+ }
+
+ @Override
+ String callRenewLeaseAPI(final String path,
+ final String leaseId,
+ final TracingContext tracingContext) throws AzureBlobFileSystemException {
+ AbfsRestOperation op = client.renewLease(path, leaseId, tracingContext);
+ return op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_LEASE_ID);
+ }
+
+ @Override
+ AbfsRestOperation callAcquireLeaseAPI(final String path, final Integer leaseDuration,
+ final TracingContext tracingContext)
+ throws AzureBlobFileSystemException {
+ return client.acquireLease(path,
+ leaseDuration, tracingContext);
+ }
+
+ @Override
+ void callReleaseLeaseAPI(final String path, final String leaseID, final TracingContext tracingContext)
+ throws AzureBlobFileSystemException {
+ client.releaseLease(path, leaseID, tracingContext);
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java
index 584f034837f0e6..7193c20137c182 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java
@@ -27,6 +27,7 @@
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLSocketFactory;
@@ -65,6 +66,9 @@
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_COMP;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COMP_LIST;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE;
+import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT;
+
/**
* Represents an HTTP operation.
*/
@@ -95,6 +99,7 @@ public class AbfsHttpOperation implements AbfsPerfLoggable {
// metrics
private int bytesSent;
+ private int expectedBytesToBeSent;
private long bytesReceived;
// optional trace enabled metrics
@@ -199,6 +204,10 @@ public int getBytesSent() {
return bytesSent;
}
+ public int getExpectedBytesToBeSent() {
+ return expectedBytesToBeSent;
+ }
+
public long getBytesReceived() {
return bytesReceived;
}
@@ -211,6 +220,10 @@ public String getResponseHeader(String httpHeader) {
return connection.getHeaderField(httpHeader);
}
+ public Map> getResponseHeaders() {
+ return connection.getHeaderFields();
+ }
+
public List getBlockIdList() {
return blockIdList;
}
@@ -334,7 +347,7 @@ public AbfsHttpOperation(final URL url, final String method, final List leaseID = new AtomicReference<>();
private volatile Throwable exception = null;
private volatile int acquireRetryCount = 0;
private volatile ListenableScheduledFuture future = null;
+ private final Integer leaseDuration;
+
+ private Timer timer = null;
public static class LeaseException extends AzureBlobFileSystemException {
public LeaseException(Throwable t) {
@@ -81,20 +87,35 @@ public LeaseException(String s) {
}
}
- public AbfsLease(AbfsClient client, String path, TracingContext tracingContext) throws AzureBlobFileSystemException {
+ /**
+ * @param client client object for making server calls
+ * @param path path on which lease has to be acquired, renewed and freed in future
+ * @param leaseDuration duration for which lease to be taken in seconds
+ * @param tracingContext for tracing server calls
+ *
+ * @throws AzureBlobFileSystemException exception while calling acquireLease API
+ */
+ public AbfsLease(AbfsClient client, String path,
+ final Integer leaseDuration,
+ TracingContext tracingContext) throws AzureBlobFileSystemException {
this(client, path, DEFAULT_LEASE_ACQUIRE_MAX_RETRIES,
- DEFAULT_LEASE_ACQUIRE_RETRY_INTERVAL, tracingContext);
+ DEFAULT_LEASE_ACQUIRE_RETRY_INTERVAL, leaseDuration, tracingContext);
}
@VisibleForTesting
public AbfsLease(AbfsClient client, String path, int acquireMaxRetries,
- int acquireRetryInterval, TracingContext tracingContext) throws AzureBlobFileSystemException {
+ int acquireRetryInterval, final Integer leaseDuration, TracingContext tracingContext) throws AzureBlobFileSystemException {
this.leaseFreed = false;
this.client = client;
this.path = path;
this.tracingContext = tracingContext;
+ this.leaseDuration = leaseDuration;
- if (client.getNumLeaseThreads() < 1) {
+ /*
+ * If the number of threads to use for lease operations for infinite lease directories
+ * and the object is created for infinite-lease (leaseDuration == null).
+ */
+ if (client.getNumLeaseThreads() < 1 && leaseDuration == null) {
throw new LeaseException(ERR_NO_LEASE_THREADS);
}
@@ -104,7 +125,7 @@ public AbfsLease(AbfsClient client, String path, int acquireMaxRetries,
acquireLease(retryPolicy, 0, acquireRetryInterval, 0,
new TracingContext(tracingContext));
- while (leaseID == null && exception == null) {
+ while (leaseID.get() == null && exception == null) {
try {
future.get();
} catch (Exception e) {
@@ -122,18 +143,23 @@ public AbfsLease(AbfsClient client, String path, int acquireMaxRetries,
private void acquireLease(RetryPolicy retryPolicy, int numRetries,
int retryInterval, long delay, TracingContext tracingContext)
- throws LeaseException {
+ throws AzureBlobFileSystemException {
LOG.debug("Attempting to acquire lease on {}, retry {}", path, numRetries);
if (future != null && !future.isDone()) {
throw new LeaseException(ERR_LEASE_FUTURE_EXISTS);
}
- future = client.schedule(() -> client.acquireLease(path,
- INFINITE_LEASE_DURATION, tracingContext),
+ if(leaseDuration != null) {
+ leaseID.set(callAcquireLeaseAPI(path, leaseDuration, tracingContext).getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_LEASE_ID));
+ spawnLeaseRenewTimer(path, leaseDuration * 1000);
+ return;
+ }
+ future = client.schedule(() -> callAcquireLeaseAPI(path,
+ INFINITE_LEASE_DURATION, tracingContext),
delay, TimeUnit.SECONDS);
client.addCallback(future, new FutureCallback() {
@Override
public void onSuccess(@Nullable AbfsRestOperation op) {
- leaseID = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_LEASE_ID);
+ leaseID.set(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_LEASE_ID));
LOG.debug("Acquired lease {} on {}", leaseID, path);
}
@@ -156,6 +182,29 @@ public void onFailure(Throwable throwable) {
});
}
+ private void spawnLeaseRenewTimer(String path, Integer leaseDuration) {
+ timer = new Timer();
+ timer.schedule(new TimerTask() {
+ @Override
+ public void run() {
+ try {
+ leaseID.set(callRenewLeaseAPI(path, leaseID.get(), tracingContext));
+ } catch (AzureBlobFileSystemException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }, leaseDuration / 2, leaseDuration / 2);
+ }
+
+ abstract String callRenewLeaseAPI(final String path,
+ final String s,
+ final TracingContext tracingContext) throws AzureBlobFileSystemException;
+
+ abstract AbfsRestOperation callAcquireLeaseAPI(final String path,
+ final Integer leaseDuration,
+ final TracingContext tracingContext)
+ throws AzureBlobFileSystemException;
+
/**
* Cancel future and free the lease. If an exception occurs while releasing the lease, the error
* will be logged. If the lease cannot be released, AzureBlobFileSystem breakLease will need to
@@ -170,9 +219,12 @@ public void free() {
if (future != null && !future.isDone()) {
future.cancel(true);
}
+ if (timer != null) {
+ timer.cancel();
+ }
TracingContext tracingContext = new TracingContext(this.tracingContext);
tracingContext.setOperation(FSOperationType.RELEASE_LEASE);
- client.releaseLease(path, leaseID, tracingContext);
+ callReleaseLeaseAPI(path, leaseID.get(), tracingContext);
} catch (IOException e) {
LOG.warn("Exception when trying to release lease {} on {}. Lease will need to be broken: {}",
leaseID, path, e.getMessage());
@@ -184,12 +236,15 @@ public void free() {
}
}
+ abstract void callReleaseLeaseAPI(final String path, final String leaseID, final TracingContext tracingContext)
+ throws AzureBlobFileSystemException;
+
public boolean isFreed() {
return leaseFreed;
}
public String getLeaseID() {
- return leaseID;
+ return leaseID.get();
}
@VisibleForTesting
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java
new file mode 100644
index 00000000000000..b88f4a05d337b4
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+final class AbfsNoOpThrottlingIntercept implements AbfsThrottlingIntercept {
+
+ public static final AbfsNoOpThrottlingIntercept INSTANCE = new AbfsNoOpThrottlingIntercept();
+
+ private AbfsNoOpThrottlingIntercept() {
+ }
+
+ @Override
+ public void updateMetrics(final AbfsRestOperationType operationType,
+ final AbfsHttpOperation abfsHttpOperation) {
+ }
+
+ @Override
+ public void sendingRequest(final AbfsRestOperationType operationType,
+ final AbfsCounters abfsCounters) {
+ }
+}
\ No newline at end of file
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOperationMetrics.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOperationMetrics.java
new file mode 100644
index 00000000000000..d19c69354a2d33
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOperationMetrics.java
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * Stores Abfs operation metrics during each analysis period.
+ */
+class AbfsOperationMetrics {
+
+ /**
+ * No of bytes which could not be transferred due to a failed operation.
+ */
+ private final AtomicLong bytesFailed;
+
+ /**
+ * No of bytes successfully transferred during a successful operation.
+ */
+ private final AtomicLong bytesSuccessful;
+
+ /**
+ * Total no of failed operations.
+ */
+ private final AtomicLong operationsFailed;
+
+ /**
+ * Total no of successful operations.
+ */
+ private final AtomicLong operationsSuccessful;
+
+ /**
+ * Time when collection of metrics ended.
+ */
+ private long endTime;
+
+ /**
+ * Time when the collection of metrics started.
+ */
+ private final long startTime;
+
+ AbfsOperationMetrics(long startTime) {
+ this.startTime = startTime;
+ this.bytesFailed = new AtomicLong();
+ this.bytesSuccessful = new AtomicLong();
+ this.operationsFailed = new AtomicLong();
+ this.operationsSuccessful = new AtomicLong();
+ }
+
+ /**
+ *
+ * @return bytes failed to transfer.
+ */
+ AtomicLong getBytesFailed() {
+ return bytesFailed;
+ }
+
+ /**
+ *
+ * @return bytes successfully transferred.
+ */
+ AtomicLong getBytesSuccessful() {
+ return bytesSuccessful;
+ }
+
+ /**
+ *
+ * @return no of operations failed.
+ */
+ AtomicLong getOperationsFailed() {
+ return operationsFailed;
+ }
+
+ /**
+ *
+ * @return no of successful operations.
+ */
+ AtomicLong getOperationsSuccessful() {
+ return operationsSuccessful;
+ }
+
+ /**
+ *
+ * @return end time of metric collection.
+ */
+ long getEndTime() {
+ return endTime;
+ }
+
+ /**
+ *
+ * @param endTime sets the end time.
+ */
+ void setEndTime(final long endTime) {
+ this.endTime = endTime;
+ }
+
+ /**
+ *
+ * @return start time of metric collection.
+ */
+ long getStartTime() {
+ return startTime;
+ }
+
+ void addBytesFailed(long bytes) {
+ this.getBytesFailed().addAndGet(bytes);
+ }
+
+ void addBytesSuccessful(long bytes) {
+ this.getBytesSuccessful().addAndGet(bytes);
+ }
+
+ void incrementOperationsFailed() {
+ this.getOperationsFailed().incrementAndGet();
+ }
+
+ void incrementOperationsSuccessful() {
+ this.getOperationsSuccessful().incrementAndGet();
+ }
+
+}
+
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java
index 8ef40df29e2cb1..3a986d2cd9a9ce 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java
@@ -95,6 +95,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable,
private boolean disableOutputStreamFlush;
private boolean enableSmallWriteOptimization;
private boolean isAppendBlob;
+ private boolean isExpectHeaderEnabled;
private volatile IOException lastError;
private long lastFlushOffset;
@@ -169,6 +170,7 @@ public AbfsOutputStream(AbfsOutputStreamContext abfsOutputStreamContext)
this.position = abfsOutputStreamContext.getPosition();
this.closed = false;
this.supportFlush = abfsOutputStreamContext.isEnableFlush();
+ this.isExpectHeaderEnabled = abfsOutputStreamContext.isExpectHeaderEnabled();
this.disableOutputStreamFlush = abfsOutputStreamContext
.isDisableOutputStreamFlush();
this.enableSmallWriteOptimization
@@ -456,7 +458,7 @@ private void uploadBlockAsync(AbfsBlock blockToUpload,
* leaseId - The AbfsLeaseId for this request.
*/
AppendRequestParameters reqParams = new AppendRequestParameters(
- offset, 0, bytesLength, mode, false, leaseId);
+ offset, 0, bytesLength, mode, false, leaseId, isExpectHeaderEnabled);
AbfsRestOperation op;
if (!OperativeEndpoint.isIngressEnabledOnDFS(prefixMode, client.getAbfsConfiguration())) {
try {
@@ -737,7 +739,7 @@ private void writeAppendBlobCurrentBufferToService() throws IOException {
try (AbfsPerfInfo perfInfo = new AbfsPerfInfo(tracker,
"writeCurrentBufferToService", "append")) {
AppendRequestParameters reqParams = new AppendRequestParameters(offset, 0,
- bytesLength, APPEND_MODE, true, leaseId);
+ bytesLength, APPEND_MODE, true, leaseId, isExpectHeaderEnabled);
AbfsRestOperation op = client.append(path, uploadData.toByteArray(), reqParams,
cachedSasToken.get(), new TracingContext(tracingContext));
cachedSasToken.update(op.getSasToken());
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java
index 67f0b2a4849b77..94a62abbe99812 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java
@@ -33,6 +33,8 @@ public class AbfsOutputStreamContext extends AbfsStreamContext {
private boolean enableFlush;
+ private boolean enableExpectHeader;
+
private boolean enableSmallWriteOptimization;
private boolean disableOutputStreamFlush;
@@ -80,6 +82,11 @@ public AbfsOutputStreamContext enableFlush(final boolean enableFlush) {
return this;
}
+ public AbfsOutputStreamContext enableExpectHeader(final boolean enableExpectHeader) {
+ this.enableExpectHeader = enableExpectHeader;
+ return this;
+ }
+
public AbfsOutputStreamContext enableSmallWriteOptimization(final boolean enableSmallWriteOptimization) {
this.enableSmallWriteOptimization = enableSmallWriteOptimization;
return this;
@@ -192,6 +199,10 @@ public boolean isEnableFlush() {
return enableFlush;
}
+ public boolean isExpectHeaderEnabled() {
+ return enableExpectHeader;
+ }
+
public boolean isDisableOutputStreamFlush() {
return disableOutputStreamFlush;
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java
index 7c957f37cefc07..3585faf7585614 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java
@@ -29,7 +29,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.fs.azurebfs.AbfsStatistic;
import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
@@ -40,6 +39,7 @@
import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.PUT_BLOCK_LIST;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_CONTINUE;
/**
* The AbfsRestOperation for Rest AbfsClient.
@@ -49,6 +49,8 @@ public class AbfsRestOperation {
private final AbfsRestOperationType operationType;
// Blob FS client, which has the credentials, retry policy, and logs.
private final AbfsClient client;
+ // Return intercept instance
+ private final AbfsThrottlingIntercept intercept;
// the HTTP method (PUT, PATCH, POST, GET, HEAD, or DELETE)
private final String method;
// full URL including query parameters
@@ -149,6 +151,7 @@ String getSasToken() {
|| AbfsHttpConstants.HTTP_METHOD_PATCH.equals(method));
this.sasToken = sasToken;
this.abfsCounters = client.getAbfsCounters();
+ this.intercept = client.getIntercept();
}
/**
@@ -230,11 +233,21 @@ private void completeExecute(TracingContext tracingContext)
}
}
- if (result.getStatusCode() >= HttpURLConnection.HTTP_BAD_REQUEST) {
+ int status = result.getStatusCode();
+ /*
+ If even after exhausting all retries, the http status code has an
+ invalid value it qualifies for InvalidAbfsRestOperationException.
+ All http status code less than 1xx range are considered as invalid
+ status codes.
+ */
+ if (status < HTTP_CONTINUE) {
+ throw new InvalidAbfsRestOperationException(null, retryCount);
+ }
+
+ if (status >= HttpURLConnection.HTTP_BAD_REQUEST) {
throw new AbfsRestOperationException(result.getStatusCode(), result.getStorageErrorCode(),
result.getStorageErrorMessage(), null, result);
}
-
LOG.trace("{} REST operation complete", operationType);
}
@@ -278,10 +291,10 @@ public void signRequest(final AbfsHttpOperation httpOperation,
*/
private boolean executeHttpOperation(final int retryCount,
TracingContext tracingContext) throws AzureBlobFileSystemException {
- AbfsHttpOperation httpOperation = null;
+ AbfsHttpOperation httpOperation;
try {
// initialize the HTTP request and open the connection
- httpOperation = createNewHttpOperation();
+ httpOperation = createHttpOperation();
incrementCounter(AbfsStatistic.CONNECTIONS_MADE, 1);
tracingContext.constructHeader(httpOperation);
@@ -296,8 +309,7 @@ private boolean executeHttpOperation(final int retryCount,
// dump the headers
AbfsIoUtils.dumpHeadersToDebugLog("Request Headers",
httpOperation.getConnection().getRequestProperties());
- AbfsClientThrottlingIntercept.sendingRequest(operationType, abfsCounters);
-
+ intercept.sendingRequest(operationType, abfsCounters);
if (hasRequestBody) {
// HttpUrlConnection requires
httpOperation.sendRequest(buffer, bufferOffset, bufferLength);
@@ -323,7 +335,7 @@ private boolean executeHttpOperation(final int retryCount,
LOG.warn("Unknown host name: %s. Retrying to resolve the host name...",
hostname);
if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) {
- throw new InvalidAbfsRestOperationException(ex);
+ throw new InvalidAbfsRestOperationException(ex, retryCount);
}
return false;
} catch (IOException ex) {
@@ -332,12 +344,25 @@ private boolean executeHttpOperation(final int retryCount,
}
if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) {
- throw new InvalidAbfsRestOperationException(ex);
+ throw new InvalidAbfsRestOperationException(ex, retryCount);
}
return false;
} finally {
- AbfsClientThrottlingIntercept.updateMetrics(operationType, httpOperation);
+ int status = httpOperation.getStatusCode();
+ /*
+ A status less than 300 (2xx range) or greater than or equal
+ to 500 (5xx range) should contribute to throttling metrics being updated.
+ Less than 200 or greater than or equal to 500 show failed operations. 2xx
+ range contributes to successful operations. 3xx range is for redirects
+ and 4xx range is for user errors. These should not be a part of
+ throttling backoff computation.
+ */
+ boolean updateMetricsResponseCode = (status < HttpURLConnection.HTTP_MULT_CHOICE
+ || status >= HttpURLConnection.HTTP_INTERNAL_ERROR);
+ if (updateMetricsResponseCode) {
+ intercept.updateMetrics(operationType, httpOperation);
+ }
}
LOG.debug("HttpRequest: {}: {}", operationType, httpOperation.toString());
@@ -351,11 +376,6 @@ private boolean executeHttpOperation(final int retryCount,
return true;
}
- @VisibleForTesting
- AbfsHttpOperation createNewHttpOperation() throws IOException {
- return new AbfsHttpOperation(url, method, requestHeaders);
- }
-
@VisibleForTesting
String getMethod() {
return method;
@@ -366,6 +386,15 @@ void setResult(AbfsHttpOperation result) {
this.result = result;
}
+ /**
+ * Creates new object of {@link AbfsHttpOperation} with the url, method, and
+ * requestHeaders fields of the AbfsRestOperation object.
+ */
+ @VisibleForTesting
+ AbfsHttpOperation createHttpOperation() throws IOException {
+ return new AbfsHttpOperation(url, method, requestHeaders);
+ }
+
/**
* Incrementing Abfs counters with a long value.
*
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java
index 349205e6ba0663..83814d883a6f14 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java
@@ -36,6 +36,7 @@ public enum AbfsRestOperationType {
SetOwner,
SetPathProperties,
SetPermissions,
+ SetBlobMetadata,
Append,
Flush,
ReadFile,
@@ -46,10 +47,12 @@ public enum AbfsRestOperationType {
PutBlob,
GetBlobProperties,
GetContainerProperties,
+ GetBlobMetadata,
PutBlock,
PutBlockList,
GetBlockList,
DeleteBlob,
GetListBlobProperties,
- CopyBlob
+ CopyBlob,
+ GetBlob
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java
new file mode 100644
index 00000000000000..0ceb4335fcef44
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * An interface for Abfs Throttling Interface.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public interface AbfsThrottlingIntercept {
+
+ /**
+ * Updates the metrics for successful and failed read and write operations.
+ * @param operationType Only applicable for read and write operations.
+ * @param abfsHttpOperation Used for status code and data transferred.
+ */
+ void updateMetrics(AbfsRestOperationType operationType,
+ AbfsHttpOperation abfsHttpOperation);
+
+ /**
+ * Called before the request is sent. Client-side throttling
+ * uses this to suspend the request, if necessary, to minimize errors and
+ * maximize throughput.
+ * @param operationType Only applicable for read and write operations.
+ * @param abfsCounters Used for counters.
+ */
+ void sendingRequest(AbfsRestOperationType operationType,
+ AbfsCounters abfsCounters);
+
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingInterceptFactory.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingInterceptFactory.java
new file mode 100644
index 00000000000000..0eabe18872d4f9
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingInterceptFactory.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
+
+import org.apache.hadoop.fs.azurebfs.utils.WeakReferenceMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Class to get an instance of throttling intercept class per account.
+ */
+final class AbfsThrottlingInterceptFactory {
+
+ private AbfsThrottlingInterceptFactory() {
+ }
+
+ private static AbfsConfiguration abfsConfig;
+
+ /**
+ * List of references notified of loss.
+ */
+ private static List lostReferences = new ArrayList<>();
+
+ private static final Logger LOG = LoggerFactory.getLogger(
+ AbfsThrottlingInterceptFactory.class);
+
+ /**
+ * Map which stores instance of ThrottlingIntercept class per account.
+ */
+ private static WeakReferenceMap
+ interceptMap = new WeakReferenceMap<>(
+ AbfsThrottlingInterceptFactory::factory,
+ AbfsThrottlingInterceptFactory::referenceLost);
+
+ /**
+ * Returns instance of throttling intercept.
+ * @param accountName Account name.
+ * @return instance of throttling intercept.
+ */
+ private static AbfsClientThrottlingIntercept factory(final String accountName) {
+ return new AbfsClientThrottlingIntercept(accountName, abfsConfig);
+ }
+
+ /**
+ * Reference lost callback.
+ * @param accountName key lost.
+ */
+ private static void referenceLost(String accountName) {
+ lostReferences.add(accountName);
+ }
+
+ /**
+ * Returns an instance of AbfsThrottlingIntercept.
+ *
+ * @param accountName The account for which we need instance of throttling intercept.
+ @param abfsConfiguration The object of abfsconfiguration class.
+ * @return Instance of AbfsThrottlingIntercept.
+ */
+ static synchronized AbfsThrottlingIntercept getInstance(String accountName,
+ AbfsConfiguration abfsConfiguration) {
+ abfsConfig = abfsConfiguration;
+ AbfsThrottlingIntercept intercept;
+ if (!abfsConfiguration.isAutoThrottlingEnabled()) {
+ return AbfsNoOpThrottlingIntercept.INSTANCE;
+ }
+ // If singleton is enabled use a static instance of the intercept class for all accounts
+ if (!abfsConfiguration.accountThrottlingEnabled()) {
+ intercept = AbfsClientThrottlingIntercept.initializeSingleton(
+ abfsConfiguration);
+ } else {
+ // Return the instance from the map
+ intercept = interceptMap.get(accountName);
+ if (intercept == null) {
+ intercept = new AbfsClientThrottlingIntercept(accountName,
+ abfsConfiguration);
+ interceptMap.put(accountName, intercept);
+ }
+ }
+ return intercept;
+ }
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java
index 218eecaa45a405..dee1d374d4a046 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java
@@ -24,6 +24,8 @@
import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_CONTINUE;
+
/**
* Retry policy used by AbfsClient.
* */
@@ -138,7 +140,7 @@ public ExponentialRetryPolicy(final int retryCount, final int minBackoff, final
*/
public boolean shouldRetry(final int retryCount, final int statusCode) {
return retryCount < this.retryCount
- && (statusCode == -1
+ && (statusCode < HTTP_CONTINUE
|| statusCode == HttpURLConnection.HTTP_CLIENT_TIMEOUT
|| statusCode == HttpURLConnection.HTTP_GONE
|| statusCode == HTTP_TOO_MANY_REQUESTS
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobConsumer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobConsumer.java
index cbbc6e30e3e8d7..80017ab558a78e 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobConsumer.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobConsumer.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.fs.azurebfs.services;
+import java.util.List;
+
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
public class ListBlobConsumer {
@@ -28,7 +30,7 @@ public ListBlobConsumer(final ListBlobQueue listBlobQueue) {
this.listBlobQueue = listBlobQueue;
}
- public BlobList consume() throws AzureBlobFileSystemException {
+ public List consume() throws AzureBlobFileSystemException {
if (listBlobQueue.getException() != null) {
throw listBlobQueue.getException();
}
@@ -37,6 +39,6 @@ public BlobList consume() throws AzureBlobFileSystemException {
public Boolean isCompleted() {
return listBlobQueue.getIsCompleted()
- && listBlobQueue.getConsumerLag() == 0;
+ && listBlobQueue.size() == 0;
}
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobProducer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobProducer.java
index c327e8b1e4407b..0d7852c3e0d391 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobProducer.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobProducer.java
@@ -80,20 +80,20 @@ public ListBlobProducer(final String src,
this.nextMarker = initNextMarker;
new Thread(() -> {
do {
- if (listBlobQueue.getConsumerLag() >= client.getAbfsConfiguration()
- .getMaximumConsumerLag()) {
+ int maxResult = listBlobQueue.availableSize();
+ if (maxResult == 0) {
continue;
}
AbfsRestOperation op = null;
try {
- op = client.getListBlobs(nextMarker, src, null, tracingContext);
+ op = client.getListBlobs(nextMarker, src, maxResult, tracingContext);
} catch (AzureBlobFileSystemException ex) {
listBlobQueue.setFailed(ex);
return;
}
BlobList blobList = op.getResult().getBlobList();
nextMarker = blobList.getNextMarker();
- listBlobQueue.enqueue(blobList);
+ listBlobQueue.enqueue(blobList.getBlobPropertyList());
if (nextMarker == null) {
listBlobQueue.complete();
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobQueue.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobQueue.java
index 7e8f6d275d44f7..b2a4f2f537407e 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobQueue.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobQueue.java
@@ -19,13 +19,15 @@
package org.apache.hadoop.fs.azurebfs.services;
import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Queue;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
public class ListBlobQueue {
- private final Queue blobLists = new ArrayDeque<>();
+ private final Queue blobLists;
private int totalProduced = 0;
@@ -42,11 +44,17 @@ public class ListBlobQueue {
*/
private ListBlobProducer producer;
- public ListBlobQueue() {
+ private final int maxSize;
+ private final int maxConsumedBlobCount;
+ public ListBlobQueue(int maxSize, int maxConsumedBlobCount) {
+ blobLists = new ArrayDeque<>(maxSize);
+ this.maxSize = maxSize;
+ this.maxConsumedBlobCount = maxConsumedBlobCount;
}
- public ListBlobQueue(BlobList initBlobList) {
+ public ListBlobQueue(List initBlobList, int maxSize, int maxConsumedBlobCount) {
+ this(maxSize, maxConsumedBlobCount);
if (initBlobList != null) {
enqueue(initBlobList);
}
@@ -74,20 +82,25 @@ AzureBlobFileSystemException getException() {
return failureFromProducer;
}
- public synchronized void enqueue(BlobList blobList) {
- blobLists.add(blobList);
- totalProduced += blobList.getBlobPropertyList().size();
+ public void enqueue(List blobProperties) {
+ blobLists.addAll(blobProperties);
}
- public synchronized BlobList dequeue() {
- BlobList blobList = blobLists.poll();
- if (blobList != null) {
- totalConsumed += blobList.getBlobPropertyList().size();
+ public List dequeue() {
+ List blobProperties = new ArrayList<>();
+ int counter = 0;
+ while(counter < maxConsumedBlobCount && blobLists.size() > 0) {
+ blobProperties.add(blobLists.poll());
+ counter++;
}
- return blobList;
+ return blobProperties;
}
- public synchronized int getConsumerLag() {
- return totalProduced - totalConsumed;
+ public int size() {
+ return blobLists.size();
+ }
+
+ public int availableSize() {
+ return maxSize - blobLists.size();
}
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/OperativeEndpoint.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/OperativeEndpoint.java
index c777ad83e6f669..83fa2124141e07 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/OperativeEndpoint.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/OperativeEndpoint.java
@@ -23,19 +23,26 @@
* This class is mainly to unify the fallback for all API's to DFS endpoint at a single spot.
*/
public class OperativeEndpoint {
- public static boolean isMkdirEnabledOnDFS(PrefixMode mode, AbfsConfiguration abfsConfiguration) {
+ public static boolean isMkdirEnabledOnDFS(PrefixMode mode, AbfsConfiguration abfsConfiguration) {
+ if (mode == PrefixMode.BLOB) {
+ return abfsConfiguration.shouldMkdirFallbackToDfs();
+ } else {
+ return true;
+ }
+ }
+
+ public static boolean isIngressEnabledOnDFS(PrefixMode mode, AbfsConfiguration abfsConfiguration) {
if (mode == PrefixMode.BLOB) {
- return abfsConfiguration.shouldMkdirFallbackToDfs();
+ return abfsConfiguration.shouldIngressFallbackToDfs();
} else {
return true;
}
}
- public static boolean isIngressEnabledOnDFS(PrefixMode mode, AbfsConfiguration abfsConfiguration) {
+ public static boolean isReadEnabledOnDFS(PrefixMode mode, AbfsConfiguration abfsConfiguration) {
if (mode == PrefixMode.BLOB) {
- return abfsConfiguration.shouldIngressFallbackToDfs();
- } else {
- return true;
+ return abfsConfiguration.shouldReadFallbackToDfs();
}
+ return true;
}
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java
new file mode 100644
index 00000000000000..52428fdd54a19f
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+public enum TimerFunctionality {
+ RESUME,
+
+ SUSPEND
+}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java
index 613274b6d0374f..241232ed917dcb 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java
@@ -140,6 +140,10 @@ public void setOperation(FSOperationType operation) {
this.opType = operation;
}
+ public int getRetryCount() {
+ return retryCount;
+ }
+
public void setRetryCount(int retryCount) {
this.retryCount = retryCount;
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java
index e27d54b443ca20..857b20b2fd80bd 100644
--- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java
@@ -169,6 +169,20 @@ public static String getMaskedUrl(URL url) {
return url.toString().replace(queryString, maskedQueryString);
}
+ public static String encodeMetadataAttribute(String value) throws UnsupportedEncodingException {
+ // We have to URL encode the attribute as it could
+ // have URI special characters which unless encoded will result
+ // in 403 errors from the server. This is due to metadata properties
+ // being sent in the HTTP header of the request which is in turn used
+ // on the server side to authorize the request.
+ return value == null ? null : URLEncoder.encode(value, StandardCharsets.UTF_8.name());
+ }
+
+ public static String decodeMetadataAttribute(String encoded) throws UnsupportedEncodingException {
+ return encoded == null ? null :
+ java.net.URLDecoder.decode(encoded, StandardCharsets.UTF_8.name());
+ }
+
private UriUtils() {
}
}
diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/WeakReferenceMap.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/WeakReferenceMap.java
new file mode 100644
index 00000000000000..cd47809689c416
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/WeakReferenceMap.java
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.utils;
+
+import java.lang.ref.WeakReference;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Consumer;
+import java.util.function.Function;
+
+import javax.annotation.Nullable;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.fs.store.LogExactlyOnce;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * A map of keys type K to objects of type V which uses weak references,
+ * so does lot leak memory through long-lived references
+ * at the expense of losing references when GC takes place..
+ *
+ * This class is intended be used instead of ThreadLocal storage when
+ * references are to be cleaned up when the instance holding.
+ * In this use case, the key is the Long key.
+ *
+ * Concurrency.
+ * The class assumes that map entries are rarely contended for when writing,
+ * and that not blocking other threads is more important than atomicity.
+ * - a ConcurrentHashMap is used to map keys to weak references, with
+ * all its guarantees.
+ * - there is no automatic pruning.
+ * - see {@link #create(Object)} for the concurrency semantics on entry creation.
+ */
+@InterfaceAudience.Private
+public class WeakReferenceMap {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(WeakReferenceMap.class);
+
+ /**
+ * The reference map.
+ */
+ private final Map> map = new ConcurrentHashMap<>();
+
+ /**
+ * Supplier of new instances.
+ */
+ private final Function super K, ? extends V> factory;
+
+ /**
+ * Nullable callback when a get on a key got a weak reference back.
+ * The assumption is that this is for logging/stats, which is why
+ * no attempt is made to use the call as a supplier of a new value.
+ */
+ private final Consumer super K> referenceLost;
+
+ /**
+ * Counter of references lost.
+ */
+ private final AtomicLong referenceLostCount = new AtomicLong();
+
+ /**
+ * Counter of entries created.
+ */
+ private final AtomicLong entriesCreatedCount = new AtomicLong();
+
+ /**
+ * Log to report loss of a reference during the create phase, which
+ * is believed to be a cause of HADOOP-18456.
+ */
+ private final LogExactlyOnce referenceLostDuringCreation = new LogExactlyOnce(LOG);
+
+ /**
+ * instantiate.
+ * @param factory supplier of new instances
+ * @param referenceLost optional callback on lost references.
+ */
+ public WeakReferenceMap(
+ Function super K, ? extends V> factory,
+ @Nullable final Consumer super K> referenceLost) {
+
+ this.factory = requireNonNull(factory);
+ this.referenceLost = referenceLost;
+ }
+
+ @Override
+ public String toString() {
+ return "WeakReferenceMap{" +
+ "size=" + size() +
+ ", referenceLostCount=" + referenceLostCount +
+ ", entriesCreatedCount=" + entriesCreatedCount +
+ '}';
+ }
+
+ /**
+ * Map size.
+ * @return the current map size.
+ */
+ public int size() {
+ return map.size();
+ }
+
+ /**
+ * Clear all entries.
+ */
+ public void clear() {
+ map.clear();
+ }
+
+ /**
+ * look up the value, returning the possibly empty weak reference
+ * to a value, or null if no value was found.
+ * @param key key to look up
+ * @return null if there is no entry, a weak reference if found
+ */
+ public WeakReference lookup(K key) {
+ return map.get(key);
+ }
+
+ /**
+ * Get the value, creating if needed.
+ * @param key key.
+ * @return an instance.
+ */
+ public V get(K key) {
+ final WeakReference currentWeakRef = lookup(key);
+ // resolve it, after which if not null, we have a strong reference
+ V strongVal = resolve(currentWeakRef);
+ if (strongVal != null) {
+ // all good.
+ return strongVal;
+ }
+
+ // here, either currentWeakRef was null, or its reference was GC'd.
+ if (currentWeakRef != null) {
+ // garbage collection removed the reference.
+
+ // explicitly remove the weak ref from the map if it has not
+ // been updated by this point
+ // this is here just for completeness.
+ map.remove(key, currentWeakRef);
+
+ // log/report the loss.
+ noteLost(key);
+ }
+
+ // create a new value and add it to the map
+ return create(key);
+ }
+
+ /**
+ * Create a new instance under a key.
+ *
+ * The instance is created, added to the map and then the
+ * map value retrieved.
+ * This ensures that the reference returned is that in the map,
+ * even if there is more than one entry being created at the same time.
+ * If that race does occur, it will be logged the first time it happens
+ * for this specific map instance.
+ *
+ * HADOOP-18456 highlighted the risk of a concurrent GC resulting a null
+ * value being retrieved and so returned.
+ * To prevent this:
+ *
+ *
A strong reference is retained to the newly created instance
+ * in a local variable.
+ *
That variable is used after the resolution process, to ensure
+ * the JVM doesn't consider it "unreachable" and so eligible for GC.
+ *
A check is made for the resolved reference being null, and if so,
+ * the put() is repeated
+ *
+ * @param key key
+ * @return the created value
+ */
+ public V create(K key) {
+ entriesCreatedCount.incrementAndGet();
+ /*
+ Get a strong ref so even if a GC happens in this method the reference is not lost.
+ It is NOT enough to have a reference in a field, it MUST be used
+ so as to ensure the reference isn't optimized away prematurely.
+ "A reachable object is any object that can be accessed in any potential continuing
+ computation from any live thread."
+ */
+
+ final V strongRef = requireNonNull(factory.apply(key),
+ "factory returned a null instance");
+ V resolvedStrongRef;
+ do {
+ WeakReference newWeakRef = new WeakReference<>(strongRef);
+
+ // put it in the map
+ map.put(key, newWeakRef);
+
+ // get it back from the map
+ WeakReference retrievedWeakRef = map.get(key);
+ // resolve that reference, handling the situation where somehow it was removed from the map
+ // between the put() and the get()
+ resolvedStrongRef = resolve(retrievedWeakRef);
+ if (resolvedStrongRef == null) {
+ referenceLostDuringCreation.warn("reference to %s lost during creation", key);
+ noteLost(key);
+ }
+ } while (resolvedStrongRef == null);
+
+ // note if there was any change in the reference.
+ // as this forces strongRef to be kept in scope
+ if (strongRef != resolvedStrongRef) {
+ LOG.debug("Created instance for key {}: {} overwritten by {}",
+ key, strongRef, resolvedStrongRef);
+ }
+
+ return resolvedStrongRef;
+ }
+
+ /**
+ * Put a value under the key.
+ * A null value can be put, though on a get() call
+ * a new entry is generated
+ *
+ * @param key key
+ * @param value value
+ * @return any old non-null reference.
+ */
+ public V put(K key, V value) {
+ return resolve(map.put(key, new WeakReference<>(value)));
+ }
+
+ /**
+ * Remove any value under the key.
+ * @param key key
+ * @return any old non-null reference.
+ */
+ public V remove(K key) {
+ return resolve(map.remove(key));
+ }
+
+ /**
+ * Does the map have a valid reference for this object?
+ * no-side effects: there's no attempt to notify or cleanup
+ * if the reference is null.
+ * @param key key to look up
+ * @return true if there is a valid reference.
+ */
+ public boolean containsKey(K key) {
+ final WeakReference current = lookup(key);
+ return resolve(current) != null;
+ }
+
+ /**
+ * Given a possibly null weak reference, resolve
+ * its value.
+ * @param r reference to resolve
+ * @return the value or null
+ */
+ protected V resolve(WeakReference r) {
+ return r == null ? null : r.get();
+ }
+
+ /**
+ * Prune all null weak references, calling the referenceLost
+ * callback for each one.
+ *
+ * non-atomic and non-blocking.
+ * @return the number of entries pruned.
+ */
+ public int prune() {
+ int count = 0;
+ final Iterator>> it = map.entrySet().iterator();
+ while (it.hasNext()) {
+ final Map.Entry> next = it.next();
+ if (next.getValue().get() == null) {
+ it.remove();
+ count++;
+ noteLost(next.getKey());
+ }
+ }
+ return count;
+ }
+
+ /**
+ * Notify the reference lost callback.
+ * @param key key of lost reference
+ */
+ private void noteLost(final K key) {
+ // increment local counter
+ referenceLostCount.incrementAndGet();
+
+ // and call any notification function supplied in the constructor
+ if (referenceLost != null) {
+ referenceLost.accept(key);
+ }
+ }
+
+ /**
+ * Get count of references lost as detected
+ * during prune() or get() calls.
+ * @return count of references lost
+ */
+ public final long getReferenceLostCount() {
+ return referenceLostCount.get();
+ }
+
+ /**
+ * Get count of entries created on demand.
+ * @return count of entries created
+ */
+ public final long getEntriesCreatedCount() {
+ return entriesCreatedCount.get();
+ }
+}
+
diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md
index dfb7f3f42a5cf9..ac77765f9e0b52 100644
--- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md
+++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md
@@ -769,6 +769,26 @@ Hflush() being the only documented API that can provide persistent data
transfer, Flush() also attempting to persist buffered data will lead to
performance issues.
+<<<<<<< HEAD
+=======
+### Hundred Continue Options
+
+`fs.azure.account.expect.header.enabled`: This configuration parameter is used
+to specify whether you wish to send a expect 100 continue header with each
+append request or not. It is configured to true by default. This flag configures
+the client to check with the Azure store before uploading a block of data from
+an output stream. This allows the client to throttle back gracefully -before
+actually attempting to upload the block. In experiments this provides
+significant throughput improvements under heavy load. For more information :
+- https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expect
+
+
+### Account level throttling Options
+
+`fs.azure.account.operation.idle.timeout`: This value specifies the time after which the timer for the analyzer (read or
+write) should be paused until no new request is made again. The default value for the same is 60 seconds.
+
+>>>>>>> c88011c6046... HADOOP-18146: ABFS: Added changes for expect hundred continue header (#4039)
### HNS Check Options
Config `fs.azure.account.hns.enabled` provides an option to specify whether
the storage account is HNS enabled or not. In case the config is not provided,
@@ -874,6 +894,9 @@ when there are too many writes from the same process.
time. Effectively this will be the threadpool size within the
AbfsOutputStream instance. Set the value in between 1 to 8 both inclusive.
+`fs.azure.analysis.period`: The time after which sleep duration is recomputed after analyzing metrics. The default value
+for the same is 10 seconds.
+
`fs.azure.write.max.requests.to.queue`: To set the maximum write requests
that can be queued. Memory consumption of AbfsOutputStream instance can be
tuned with this config considering each queued request holds a buffer. Set
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java
index 932b7638a48481..cca2a94add2bc5 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java
@@ -25,6 +25,7 @@
import java.util.UUID;
import java.util.concurrent.Callable;
+import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider;
import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
import org.apache.hadoop.fs.azurebfs.services.PrefixMode;
import org.junit.After;
@@ -42,6 +43,7 @@
import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager;
import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream;
import org.apache.hadoop.fs.azurebfs.services.AuthType;
+import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient;
import org.apache.hadoop.fs.azure.AzureNativeFileSystemStore;
import org.apache.hadoop.fs.azure.NativeAzureFileSystem;
import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation;
@@ -73,7 +75,7 @@ public abstract class AbstractAbfsIntegrationTest extends
AbstractAbfsTestWithTimeout {
private static final Logger LOG =
- LoggerFactory.getLogger(AbstractAbfsIntegrationTest.class);
+ LoggerFactory.getLogger(AbstractAbfsIntegrationTest.class);
private boolean isIPAddress;
private NativeAzureFileSystem wasb;
@@ -110,7 +112,7 @@ protected AbstractAbfsIntegrationTest() throws Exception {
if (authType == AuthType.SharedKey) {
assumeTrue("Not set: " + FS_AZURE_ACCOUNT_KEY,
- abfsConfig.get(FS_AZURE_ACCOUNT_KEY) != null);
+ abfsConfig.get(FS_AZURE_ACCOUNT_KEY) != null);
// Update credentials
} else {
assumeTrue("Not set: " + FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME,
@@ -243,6 +245,9 @@ public Hashtable call() throws Exception {
}
}
+ public AccessTokenProvider getAccessTokenProvider(final AzureBlobFileSystem fs) {
+ return ITestAbfsClient.getAccessTokenProvider(fs.getAbfsStore().getClient());
+ }
public void loadConfiguredFileSystem() throws Exception {
// disable auto-creation of filesystem
@@ -506,4 +511,30 @@ protected long assertAbfsStatistics(AbfsStatistic statistic,
(long) metricMap.get(statistic.getStatName()));
return expectedValue;
}
+
+ /**
+ * For creating directory with implicit parents. Doesn't change already explicit
+ * parents.
+ */
+ void createAzCopyDirectory(Path path) throws Exception {
+ AzcopyHelper azcopyHelper = new AzcopyHelper(
+ getAccountName(), getFileSystemName(), getFileSystem().getAbfsStore()
+ .getAbfsConfiguration()
+ .getRawConfiguration(), getFileSystem().getAbfsStore().getPrefixMode());
+ azcopyHelper.createFolderUsingAzcopy(
+ getFileSystem().makeQualified(path).toUri().getPath().substring(1));
+ }
+
+ /**
+ * For creating files with implicit parents. Doesn't change already explicit
+ * parents.
+ */
+ void createAzCopyFile(Path path) throws Exception {
+ AzcopyHelper azcopyHelper = new AzcopyHelper(getAccountName(),
+ getFileSystemName(), getFileSystem().getAbfsStore()
+ .getAbfsConfiguration()
+ .getRawConfiguration(), getFileSystem().getAbfsStore().getPrefixMode());
+ azcopyHelper.createFileUsingAzcopy(
+ getFileSystem().makeQualified(path).toUri().getPath().substring(1));
+ }
}
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/BlobDirectoryStateHelper.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/BlobDirectoryStateHelper.java
index e4fa38e365ec32..dad4551f65dbdb 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/BlobDirectoryStateHelper.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/BlobDirectoryStateHelper.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.fs.azurebfs;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.util.List;
@@ -29,7 +28,6 @@
import org.mockito.Mockito;
-import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
import org.apache.hadoop.fs.azurebfs.services.BlobProperty;
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java
index 98ce66d4cbba6a..b45940f1549ca3 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.fs.azurebfs;
+import java.io.FileNotFoundException;
import java.io.IOException;
import org.assertj.core.api.Assertions;
@@ -31,6 +32,8 @@
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.azurebfs.services.PrefixMode;
+
import org.mockito.Mockito;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DOT;
@@ -63,7 +66,12 @@ public void testAbfsRestOperationExceptionFormat() throws IOException {
Assert.assertEquals(4, errorFields.length);
// Check status message, status code, HTTP Request Type and URL.
- Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim());
+ if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) {
+ Assert.assertEquals("Operation failed: \"The specified blob does not exist.\"", errorFields[0].trim());
+ }
+ else {
+ Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim());
+ }
Assert.assertEquals("404", errorFields[1].trim());
Assert.assertEquals("HEAD", errorFields[2].trim());
Assert.assertTrue(errorFields[3].trim().startsWith("http"));
@@ -79,7 +87,12 @@ public void testAbfsRestOperationExceptionFormat() throws IOException {
if (!getAbfsStore(fs).getAbfsConfiguration().enableAbfsListIterator()) {
Assert.assertEquals(6, errorFields.length);
// Check status message, status code, HTTP Request Type and URL.
- Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim());
+ if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) {
+ Assert.assertEquals("Operation failed: \"The specified blob does not exist.\"", errorFields[0].trim());
+ }
+ else {
+ Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim());
+ }
Assert.assertEquals("404", errorFields[1].trim());
Assert.assertEquals("GET", errorFields[2].trim());
Assert.assertTrue(errorFields[3].trim().startsWith("http"));
@@ -90,7 +103,12 @@ public void testAbfsRestOperationExceptionFormat() throws IOException {
} else {
Assert.assertEquals(4, errorFields.length);
// Check status message, status code, HTTP Request Type and URL.
- Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim());
+ if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) {
+ Assert.assertEquals("Operation failed: \"The specified blob does not exist.\"", errorFields[0].trim());
+ }
+ else {
+ Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim());
+ }
Assert.assertEquals("404", errorFields[1].trim());
Assert.assertEquals("HEAD", errorFields[2].trim());
Assert.assertTrue(errorFields[3].trim().startsWith("http"));
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java
index fd4f8d4622e075..a6202f54378d4f 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java
@@ -33,12 +33,13 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream;
import org.apache.hadoop.fs.azurebfs.services.OperativeEndpoint;
import org.apache.hadoop.fs.azurebfs.services.PrefixMode;
-import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient;
+import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient;
import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
import org.junit.Assume;
import org.junit.Test;
@@ -242,6 +243,19 @@ public void testCreateEmptyBlob() throws IOException {
Mockito.any(TracingContext.class));
}
+ @Test
+ public void testAppendImplicitDirectoryAzcopy() throws Exception {
+ Assume.assumeTrue(getFileSystem().getAbfsStore().getPrefixMode() == PrefixMode.BLOB);
+ AzureBlobFileSystem fs = getFileSystem();
+ createAzCopyDirectory(new Path("/src"));
+ createAzCopyFile(new Path("/src/file"));
+ intercept(AbfsRestOperationException.class, () -> {
+ fs.getAbfsStore().getBlobProperty(new Path("/src"), Mockito.mock(
+ TracingContext.class));
+ });
+ intercept(FileNotFoundException.class, () -> fs.append(new Path("/src")));
+ }
+
/**
* Verify that no calls to getBlockList were made.
*/
@@ -252,7 +266,7 @@ public void testCreateNonEmptyBlob() throws IOException {
AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore());
Mockito.doReturn(store).when(fs).getAbfsStore();
AbfsClient client = store.getClient();
- AbfsClient testClient = Mockito.spy(TestAbfsClient.createTestClientFromCurrentContext(
+ AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext(
client,
fs.getAbfsStore().getAbfsConfiguration()));
store.setClient(testClient);
@@ -277,7 +291,7 @@ public void testValidateGetBlockList() throws Exception {
AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore());
Mockito.doReturn(store).when(fs).getAbfsStore();
AbfsClient client = store.getClient();
- AbfsClient testClient = Mockito.spy(TestAbfsClient.createTestClientFromCurrentContext(
+ AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext(
client,
fs.getAbfsStore().getAbfsConfiguration()));
store.setClient(testClient);
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAttributes.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAttributes.java
index beb7d0ebaaa8ea..e1eb315bc73bed 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAttributes.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAttributes.java
@@ -19,6 +19,8 @@
package org.apache.hadoop.fs.azurebfs;
import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
import java.util.EnumSet;
import org.junit.Assume;
@@ -26,8 +28,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.XAttrSetFlag;
-import org.apache.hadoop.fs.azurebfs.constants.FSOperationType;
-import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator;
+import org.apache.hadoop.fs.azurebfs.services.PrefixMode;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
@@ -42,70 +43,218 @@ public ITestAzureBlobFileSystemAttributes() throws Exception {
super();
}
+ /**
+ * Test GetXAttr() and SetXAttr() with Unicode Attribute Values.
+ * DFS does not support Unicode characters in user-defined metadata properties.
+ * Blob Endpoint supports Unicode encoded in UTF_8 character encoding.
+ * @throws Exception
+ */
@Test
- public void testSetGetXAttr() throws Exception {
+ public void testGetSetXAttr() throws Exception {
AzureBlobFileSystem fs = getFileSystem();
- AbfsConfiguration conf = fs.getAbfsStore().getAbfsConfiguration();
- Assume.assumeTrue(getIsNamespaceEnabled(fs));
-
- byte[] attributeValue1 = fs.getAbfsStore().encodeAttribute("hi");
- byte[] attributeValue2 = fs.getAbfsStore().encodeAttribute("ä½ å¥½");
- String attributeName1 = "user.asciiAttribute";
- String attributeName2 = "user.unicodeAttribute";
- Path testFile = path("setGetXAttr");
-
- // after creating a file, the xAttr should not be present
- touch(testFile);
- assertNull(fs.getXAttr(testFile, attributeName1));
-
- // after setting the xAttr on the file, the value should be retrievable
- fs.registerListener(
- new TracingHeaderValidator(conf.getClientCorrelationId(),
- fs.getFileSystemId(), FSOperationType.SET_ATTR, true, 0));
- fs.setXAttr(testFile, attributeName1, attributeValue1);
- fs.setListenerOperation(FSOperationType.GET_ATTR);
- assertArrayEquals(attributeValue1, fs.getXAttr(testFile, attributeName1));
- fs.registerListener(null);
-
- // after setting a second xAttr on the file, the first xAttr values should not be overwritten
- fs.setXAttr(testFile, attributeName2, attributeValue2);
- assertArrayEquals(attributeValue1, fs.getXAttr(testFile, attributeName1));
- assertArrayEquals(attributeValue2, fs.getXAttr(testFile, attributeName2));
+ final Path path = new Path("a/b");
+ fs.create(path);
+
+ String attributeName1 = "user.attribute1";
+ String attributeName2 = "user.attribute2";
+ String decodedAttributeValue1;
+ String decodedAttributeValue2;
+ byte[] attributeValue1;
+ byte[] attributeValue2;
+
+ if(fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) {
+ Assume.assumeTrue(!getIsNamespaceEnabled(fs)); // Blob endpoint Currently Supports FNS only
+ decodedAttributeValue1 = "hi";
+ decodedAttributeValue2 = "hello"; //Блюз //ä½ å¥½
+ // TODO: Modify them to unicode characters when support is added
+ attributeValue1 = fs.getAbfsStore().encodeAttribute(decodedAttributeValue1);
+ attributeValue2 = fs.getAbfsStore().encodeAttribute(decodedAttributeValue2);
+ }
+ else {
+ decodedAttributeValue1 = "hi";
+ decodedAttributeValue2 = "hello"; // DFS Endpoint only Supports ASCII
+ attributeValue1 = fs.getAbfsStore().encodeAttribute(decodedAttributeValue1);
+ attributeValue2 = fs.getAbfsStore().encodeAttribute(decodedAttributeValue2);
+ }
+
+ // Attribute not present initially
+ assertNull(fs.getXAttr(path, attributeName1));
+ assertNull(fs.getXAttr(path, attributeName2));
+
+ // Set the Attributes
+ fs.setXAttr(path, attributeName1, attributeValue1);
+
+ // Check if the attribute is retrievable
+ byte[] rv = fs.getXAttr(path, attributeName1);
+ assertTrue(Arrays.equals(rv, attributeValue1));
+ assertEquals(new String(rv, StandardCharsets.UTF_8), decodedAttributeValue1);
+
+ // Set the second Attribute
+ fs.setXAttr(path, attributeName2, attributeValue2);
+
+ // Check all the attributes present and previous Attribute not overridden
+ rv = fs.getXAttr(path, attributeName1);
+ assertTrue(Arrays.equals(rv, attributeValue1));
+ assertEquals(new String(rv, StandardCharsets.UTF_8), decodedAttributeValue1);
+ rv = fs.getXAttr(path, attributeName2);
+ assertTrue(Arrays.equals(rv, attributeValue2));
+ assertEquals(new String(rv, StandardCharsets.UTF_8), decodedAttributeValue2);
+ }
+
+ @Test
+ public void testGetXAttrOnImplicitPath() throws Exception {
+ final AzureBlobFileSystem fs = getFileSystem();
+ AzcopyHelper azcopyHelper = new AzcopyHelper(
+ getAccountName(),
+ getFileSystemName(),
+ getRawConfiguration(),
+ fs.getAbfsStore().getPrefixMode()
+ );
+
+ Path testPath = new Path("a/b");
+ azcopyHelper.createFolderUsingAzcopy(fs.makeQualified(testPath).toUri().getPath().substring(1));
+
+ assertTrue("Path is implicit.",
+ BlobDirectoryStateHelper.isImplicitDirectory(testPath, fs));
+
+ String attributeName1 = "user.attribute1";
+ assertNull(fs.getXAttr(testPath, attributeName1));
+ }
+
+ /**
+ * Trying to set same attribute multiple times should result in no failure
+ * @throws Exception
+ */
+ @Test
+ public void testSetXAttrMultipleOperations() throws Exception {
+ AzureBlobFileSystem fs = getFileSystem();
+ final Path path = new Path("a/b");
+ fs.create(path);
+
+ String attributeName1 = "user.attribute1";
+ byte[] attributeValue1;
+ String decodedAttributeValue1 = "hi";
+
+ if(fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) {
+ Assume.assumeTrue(!getIsNamespaceEnabled(fs));
+ // TODO: Modify them to unicode characters when support is added
+ attributeValue1 = fs.getAbfsStore().encodeAttribute("hi");
+ }
+ else {
+ attributeValue1 = fs.getAbfsStore().encodeAttribute("hi");
+ }
+
+ // Attribute not present initially
+ assertNull(fs.getXAttr(path, attributeName1));
+
+ // Set the Attributes Multiple times
+ // Filesystem internally adds create and replace flags
+ fs.setXAttr(path, attributeName1, attributeValue1);
+ fs.setXAttr(path, attributeName1, attributeValue1);
+
+ // Check if the attribute is retrievable
+ byte[] rv = fs.getXAttr(path, attributeName1);
+ assertTrue(Arrays.equals(rv, attributeValue1));
+ assertEquals(new String(rv, StandardCharsets.UTF_8), decodedAttributeValue1);
+ }
+
+ /**
+ * Test that setting metadata over marker blob do not override
+ * x-ms-meta-hdi_IsFolder
+ * TODO: Confirm Expected Behavior
+ * @throws Exception
+ */
+ @Test
+ public void testSetXAttrOverMarkerBlob() throws Exception {
+ AzureBlobFileSystem fs = getFileSystem();
+ final Path path = new Path("a/b");
+ fs.mkdirs(path);
+
+ assertTrue(BlobDirectoryStateHelper.isExplicitDirectory(path, fs));
+
+ String attributeName1 = "user.attribute1";
+ byte[] attributeValue1;
+
+ if(fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) {
+ Assume.assumeTrue(!getIsNamespaceEnabled(fs));
+ // TODO: Modify them to unicode characters when support is added
+ attributeValue1 = fs.getAbfsStore().encodeAttribute("hi");
+ }
+ else {
+ attributeValue1 = fs.getAbfsStore().encodeAttribute("hi");
+ }
+
+ // Attribute not present initially
+ assertNull(fs.getXAttr(path, attributeName1));
+
+ // Set the Attribute on marker blob
+ fs.setXAttr(path, attributeName1, attributeValue1);
+
+ // Check if the attribute is retrievable
+ byte[] rv = fs.getXAttr(path, attributeName1);
+ assertTrue(Arrays.equals(rv, attributeValue1));
+
+ // Check if Marker blob still exists as marker.
+ assertTrue(BlobDirectoryStateHelper.isExplicitDirectory(path, fs));
}
@Test
public void testSetGetXAttrCreateReplace() throws Exception {
AzureBlobFileSystem fs = getFileSystem();
- Assume.assumeTrue(getIsNamespaceEnabled(fs));
- byte[] attributeValue = fs.getAbfsStore().encodeAttribute("one");
- String attributeName = "user.someAttribute";
- Path testFile = path("createReplaceXAttr");
+ final Path testFile = new Path("a/b");
+
+ String attributeName = "user.attribute1";
+ String decodedAttributeValue1;
+ byte[] attributeValue;
+
+ if(fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) {
+ Assume.assumeTrue(!getIsNamespaceEnabled(fs)); // Blob endpoint Currently Supports FNS only
+ decodedAttributeValue1 = "hi";
+ attributeValue = decodedAttributeValue1.getBytes(StandardCharsets.UTF_8);
+ }
+ else {
+ decodedAttributeValue1 = "hi";
+ attributeValue = fs.getAbfsStore().encodeAttribute(decodedAttributeValue1);
+ }
// after creating a file, it must be possible to create a new xAttr
- touch(testFile);
+ fs.create(testFile);
fs.setXAttr(testFile, attributeName, attributeValue, CREATE_FLAG);
assertArrayEquals(attributeValue, fs.getXAttr(testFile, attributeName));
- // however after the xAttr is created, creating it again must fail
+ // however, after the xAttr is created, creating it again must fail
intercept(IOException.class, () -> fs.setXAttr(testFile, attributeName, attributeValue, CREATE_FLAG));
}
@Test
public void testSetGetXAttrReplace() throws Exception {
AzureBlobFileSystem fs = getFileSystem();
- Assume.assumeTrue(getIsNamespaceEnabled(fs));
- byte[] attributeValue1 = fs.getAbfsStore().encodeAttribute("one");
- byte[] attributeValue2 = fs.getAbfsStore().encodeAttribute("two");
- String attributeName = "user.someAttribute";
- Path testFile = path("replaceXAttr");
+ final Path testFile = new Path("a/b");
+
+ String attributeName = "user.attribute1";
+ String decodedAttributeValue1 = "one";
+ String decodedAttributeValue2 = "two";
+
+ byte[] attributeValue1;
+ byte[] attributeValue2;
+
+ if(fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) {
+ Assume.assumeTrue(!getIsNamespaceEnabled(fs)); // Blob endpoint Currently Supports FNS only
+ attributeValue1 = decodedAttributeValue1.getBytes(StandardCharsets.UTF_8);
+ attributeValue2 = decodedAttributeValue2.getBytes(StandardCharsets.UTF_8);
+ }
+ else {
+ attributeValue1 = fs.getAbfsStore().encodeAttribute(decodedAttributeValue1);
+ attributeValue2 = fs.getAbfsStore().encodeAttribute(decodedAttributeValue2);
+ }
// after creating a file, it must not be possible to replace an xAttr
intercept(IOException.class, () -> {
- touch(testFile);
+ fs.create(testFile);
fs.setXAttr(testFile, attributeName, attributeValue1, REPLACE_FLAG);
});
- // however after the xAttr is created, replacing it must succeed
+ // however, after the xAttr is created, replacing it must succeed
fs.setXAttr(testFile, attributeName, attributeValue1, CREATE_FLAG);
fs.setXAttr(testFile, attributeName, attributeValue2, REPLACE_FLAG);
assertArrayEquals(attributeValue2, fs.getXAttr(testFile, attributeName));
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java
index eaf2f94269ed60..122b9ab36d81db 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java
@@ -61,7 +61,7 @@
import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation;
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
-import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient;
+import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient;
import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator;
import org.mockito.Mockito;
@@ -75,6 +75,7 @@
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE;
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_BLOB_MKDIR_OVERWRITE;
+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LEASE_CREATE_NON_RECURSIVE;
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_MKDIRS_FALLBACK_TO_DFS;
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_DNS_PREFIX;
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.WASB_DNS_PREFIX;
@@ -954,7 +955,9 @@ public void testActiveCreateNonRecursiveDenyParallelReadOnAtomicDir() throws Exc
Assume.assumeTrue(
getFileSystem().getAbfsStore().getAbfsConfiguration().getPrefixMode()
== PrefixMode.BLOB);
- AzureBlobFileSystem fileSystem = (AzureBlobFileSystem) FileSystem.newInstance(getRawConfiguration());
+ Configuration configuration = Mockito.spy(getRawConfiguration());
+ configuration.set(FS_AZURE_LEASE_CREATE_NON_RECURSIVE, "true");
+ AzureBlobFileSystem fileSystem = (AzureBlobFileSystem) FileSystem.newInstance(configuration);
AbfsClient client = Mockito.spy(fileSystem.getAbfsClient());
fileSystem.getAbfsStore().setClient(client);
fileSystem.setWorkingDirectory(new Path("/"));
@@ -994,6 +997,53 @@ public void testActiveCreateNonRecursiveDenyParallelReadOnAtomicDir() throws Exc
Assert.assertTrue(fileSystem.exists(new Path("/hbase/dir/file")));
}
+ @Test
+ public void testActiveCreateNonRecursiveNotDenyParallelReadOnAtomicDirIfLeaseConfigDisabled() throws Exception {
+ Assume.assumeTrue(
+ getFileSystem().getAbfsStore().getAbfsConfiguration().getPrefixMode()
+ == PrefixMode.BLOB);
+ Configuration configuration = Mockito.spy(getRawConfiguration());
+ AzureBlobFileSystem fileSystem = (AzureBlobFileSystem) FileSystem.newInstance(configuration);
+ AbfsClient client = Mockito.spy(fileSystem.getAbfsClient());
+ fileSystem.getAbfsStore().setClient(client);
+ fileSystem.setWorkingDirectory(new Path("/"));
+ fileSystem.mkdirs(new Path("/hbase/dir"));
+ fileSystem.create(new Path("/hbase/dir/file"));
+ AtomicBoolean createCalled = new AtomicBoolean(false);
+ AtomicBoolean parallelRenameDone = new AtomicBoolean(false);
+ AtomicBoolean exceptionCaught = new AtomicBoolean(false);
+
+ Mockito.doAnswer(answer -> {
+ AbfsRestOperation op = (AbfsRestOperation) answer.callRealMethod();
+ createCalled.set(true);
+ while(!parallelRenameDone.get());
+ return op;
+ }).when(client).createPathBlob(Mockito.anyString(), Mockito.anyBoolean(),
+ Mockito.anyBoolean(), Mockito.nullable(HashMap.class), Mockito.nullable(String.class), Mockito.nullable(TracingContext.class));
+
+ new Thread(() -> {
+ try {
+ while(!createCalled.get());
+ getFileSystem().rename(new Path("/hbase/dir/"), new Path("/hbase/dir2"));
+ } catch (Exception e) {
+ exceptionCaught.set(true);
+ } finally {
+ parallelRenameDone.set(true);
+ }
+ }).start();
+
+ fileSystem.createFile(new Path("/hbase/dir/file1"))
+ .overwrite(false)
+ .replication((short) 1)
+ .bufferSize(1024)
+ .blockSize(1024)
+ .build();
+
+ Assert.assertFalse(exceptionCaught.get());
+ Assert.assertFalse(fileSystem.exists(new Path("/hbase/dir/file")));
+ Assert.assertTrue(fileSystem.exists(new Path("/hbase/dir2/file")));
+ }
+
/**
* Attempts to use to the ABFS stream after it is closed.
*/
@@ -1123,9 +1173,15 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite)
// One request to server to create path should be issued
// two calls added for -
- // 1. getFileStatus
- // 2. actual create call
+ // 1. getFileStatus : 1
+ // 2. actual create call: 1
createRequestCount+=2;
+
+ // In case of blob endpoint getFileStatus makes additional call to check if path is implicit
+ if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) {
+ createRequestCount++;
+ }
+
createRequestCount+=ifBlobCheckIfPathDir;
assertAbfsStatistics(
@@ -1226,7 +1282,7 @@ public void testNegativeScenariosForCreateOverwriteDisabled()
// Get mock AbfsClient with current config
AbfsClient
mockClient
- = TestAbfsClient.getMockAbfsClient(
+ = ITestAbfsClient.getMockAbfsClient(
fs.getAbfsStore().getClient(),
fs.getAbfsStore().getAbfsConfiguration());
@@ -1286,6 +1342,13 @@ public void testNegativeScenariosForCreateOverwriteDisabled()
.when(mockClient)
.getPathStatus(any(String.class), eq(false), any(TracingContext.class));
+ doThrow(fileNotFoundResponseEx) // Scn1: GFS fails with Http404
+ .doThrow(serverErrorResponseEx) // Scn2: GFS fails with Http500
+ .doReturn(successOp) // Scn3: create overwrite=true fails with Http412
+ .doReturn(successOp) // Scn4: create overwrite=true fails with Http500
+ .when(mockClient)
+ .getBlobProperty(any(Path.class), any(TracingContext.class));
+
// mock for overwrite=true
doThrow(
preConditionResponseEx) // Scn3: create overwrite=true fails with Http412
@@ -1432,7 +1495,12 @@ private String extractFileEtag(String fileName) throws IOException {
final AzureBlobFileSystem fs = getFileSystem();
final AbfsClient client = fs.getAbfsClient();
final TracingContext testTracingContext = getTestTracingContext(fs, false);
- AbfsRestOperation op = client.getPathStatus(fileName, true, testTracingContext);
+ AbfsRestOperation op;
+ if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) {
+ op = client.getBlobProperty(new Path(fileName), testTracingContext);
+ } else {
+ op = client.getPathStatus(fileName, true, testTracingContext);
+ }
return AzureBlobFileSystemStore.extractEtagHeader(op.getResult());
}
}
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java
index bc18c85a9e711c..a2549aa17b5113 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java
@@ -38,7 +38,7 @@
import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation;
-import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient;
+import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient;
import org.apache.hadoop.fs.azurebfs.services.TestAbfsPerfTracker;
import org.apache.hadoop.fs.azurebfs.utils.TestMockHelpers;
import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
@@ -195,7 +195,7 @@ public void testDeleteIdempotency() throws Exception {
final AzureBlobFileSystem fs = getFileSystem();
AbfsClient abfsClient = fs.getAbfsStore().getClient();
- AbfsClient testClient = TestAbfsClient.createTestClientFromCurrentContext(
+ AbfsClient testClient = ITestAbfsClient.createTestClientFromCurrentContext(
abfsClient,
abfsConfig);
@@ -242,7 +242,7 @@ public void testDeleteIdempotency() throws Exception {
public void testDeleteIdempotencyTriggerHttp404() throws Exception {
final AzureBlobFileSystem fs = getFileSystem();
- AbfsClient client = TestAbfsClient.createTestClientFromCurrentContext(
+ AbfsClient client = ITestAbfsClient.createTestClientFromCurrentContext(
fs.getAbfsStore().getClient(),
this.getConfiguration());
@@ -261,7 +261,7 @@ public void testDeleteIdempotencyTriggerHttp404() throws Exception {
getTestTracingContext(fs, true)));
// mock idempotency check to mimic retried case
- AbfsClient mockClient = TestAbfsClient.getMockAbfsClient(
+ AbfsClient mockClient = ITestAbfsClient.getMockAbfsClient(
fs.getAbfsStore().getClient(),
this.getConfiguration());
AzureBlobFileSystemStore mockStore = mock(AzureBlobFileSystemStore.class);
@@ -276,10 +276,10 @@ public void testDeleteIdempotencyTriggerHttp404() throws Exception {
// Case 2: Mimic retried case
// Idempotency check on Delete always returns success
- AbfsRestOperation idempotencyRetOp = TestAbfsClient.getRestOp(
+ AbfsRestOperation idempotencyRetOp = ITestAbfsClient.getRestOp(
DeletePath, mockClient, HTTP_METHOD_DELETE,
- TestAbfsClient.getTestUrl(mockClient, "/NonExistingPath"),
- TestAbfsClient.getTestRequestHeaders(mockClient));
+ ITestAbfsClient.getTestUrl(mockClient, "/NonExistingPath"),
+ ITestAbfsClient.getTestRequestHeaders(mockClient));
idempotencyRetOp.hardSetResult(HTTP_OK);
doReturn(idempotencyRetOp).when(mockClient).deleteIdempotencyCheckOp(any());
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemExplictImplicitRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemExplictImplicitRename.java
index 8ffbbadf13a18d..44f2f1b9c94754 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemExplictImplicitRename.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemExplictImplicitRename.java
@@ -49,32 +49,6 @@ public void setup() throws Exception {
== PrefixMode.BLOB);
}
- /**
- * For creating directory with implicit parents. Doesn't change already explicit
- * parents.
- */
- void createAzCopyDirectory(Path path) throws Exception {
- AzcopyHelper azcopyHelper = new AzcopyHelper(
- getAccountName(), getFileSystemName(), getFileSystem().getAbfsStore()
- .getAbfsConfiguration()
- .getRawConfiguration(), getFileSystem().getAbfsStore().getPrefixMode());
- azcopyHelper.createFolderUsingAzcopy(
- getFileSystem().makeQualified(path).toUri().getPath().substring(1));
- }
-
- /**
- * For creating files with implicit parents. Doesn't change already explicit
- * parents.
- */
- void createAzCopyFile(Path path) throws Exception {
- AzcopyHelper azcopyHelper = new AzcopyHelper(getAccountName(),
- getFileSystemName(), getFileSystem().getAbfsStore()
- .getAbfsConfiguration()
- .getRawConfiguration(), getFileSystem().getAbfsStore().getPrefixMode());
- azcopyHelper.createFileUsingAzcopy(
- getFileSystem().makeQualified(path).toUri().getPath().substring(1));
- }
-
@Test
public void testRenameSrcFileInImplicitParentDirectory() throws Exception {
AzureBlobFileSystem fs = getFileSystem();
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java
index b9498be89a1ffe..4136da519ec913 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java
@@ -27,6 +27,8 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
/**
* Test FileStatus.
*/
@@ -140,4 +142,158 @@ public void testLastModifiedTime() throws IOException {
assertTrue("lastModifiedTime should be before createEndTime",
createEndTime > lastModifiedTime);
}
+
+ @Test
+ public void testFileStatusOnFileWithImplicitParent() throws Exception {
+ final AzureBlobFileSystem fs = getFileSystem();
+ AzcopyHelper azcopyHelper = new AzcopyHelper(
+ getAccountName(),
+ getFileSystemName(),
+ getRawConfiguration(),
+ fs.getAbfsStore().getPrefixMode()
+ );
+
+ Path testPath = new Path("a/b.txt");
+ azcopyHelper.createFileUsingAzcopy(fs.makeQualified(testPath).toUri().getPath().substring(1));
+
+ assertTrue("Parent directory is implicit.",
+ BlobDirectoryStateHelper.isImplicitDirectory(testPath.getParent(), fs));
+
+ // Assert getFileStatus Succeed on path
+ FileStatus fileStatus = fs.getFileStatus(testPath);
+ assertNotNull(fileStatus.getPath());
+ assertFalse(fileStatus.isDirectory());
+ assertNotEquals(0L, fileStatus.getLen());
+ }
+
+ @Test
+ public void testFileStatusOnFileWithExplicitParent() throws Exception {
+ final AzureBlobFileSystem fs = getFileSystem();
+ Path testPath = new Path("a/b.txt");
+ fs.create(testPath);
+
+ assertTrue("Parent directory is explicit.",
+ BlobDirectoryStateHelper.isExplicitDirectory(testPath.getParent(), fs));
+
+ FileStatus fileStatus = fs.getFileStatus(testPath);
+ assertNotNull(fileStatus.getPath());
+ assertFalse(fileStatus.isDirectory());
+ }
+
+ @Test
+ public void testFileStatusOnImplicitDirWithImplicitParent() throws Exception {
+ final AzureBlobFileSystem fs = getFileSystem();
+ AzcopyHelper azcopyHelper = new AzcopyHelper(
+ getAccountName(),
+ getFileSystemName(),
+ getRawConfiguration(),
+ fs.getAbfsStore().getPrefixMode()
+ );
+
+ Path testPath = new Path("a/b");
+ azcopyHelper.createFolderUsingAzcopy(fs.makeQualified(testPath).toUri().getPath().substring(1));
+
+ assertTrue("Path is implicit.",
+ BlobDirectoryStateHelper.isImplicitDirectory(testPath, fs));
+ assertTrue("Parent directory is implicit.",
+ BlobDirectoryStateHelper.isImplicitDirectory(testPath.getParent(), fs));
+
+ // Assert that getFileStatus succeeds
+ FileStatus fileStatus = fs.getFileStatus(testPath);
+ assertNotNull(fileStatus.getPath());
+ assertTrue(fileStatus.isDirectory());
+ assertEquals(0L, fileStatus.getLen());
+ }
+
+ @Test
+ public void testFileStatusOnImplicitDirWithExplicitParent() throws Exception {
+ final AzureBlobFileSystem fs = getFileSystem();
+ AzcopyHelper azcopyHelper = new AzcopyHelper(
+ getAccountName(),
+ getFileSystemName(),
+ getRawConfiguration(),
+ fs.getAbfsStore().getPrefixMode()
+ );
+
+ Path testPath = new Path("a/b");
+ azcopyHelper.createFolderUsingAzcopy(fs.makeQualified(testPath).toUri().getPath().substring(1));
+ fs.mkdirs(testPath.getParent());
+
+ assertTrue("Path is implicit.",
+ BlobDirectoryStateHelper.isImplicitDirectory(testPath, fs));
+ assertTrue("Parent directory is explicit.",
+ BlobDirectoryStateHelper.isExplicitDirectory(testPath.getParent(), fs));
+
+ // Assert that getFileStatus succeeds
+ FileStatus fileStatus = fs.getFileStatus(testPath);
+ assertNotNull(fileStatus.getPath());
+ assertTrue(fileStatus.isDirectory());
+ assertEquals(0L, fileStatus.getLen());
+ }
+
+ @Test
+ public void testFileStatusOnExplicitDirWithExplicitParent() throws Exception {
+ final AzureBlobFileSystem fs = getFileSystem();
+ Path testPath = new Path("a/b");
+ fs.mkdirs(testPath);
+
+ assertTrue("Parent directory is explicit.",
+ BlobDirectoryStateHelper.isExplicitDirectory(testPath.getParent(), fs));
+ assertTrue("Path is explicit.",
+ BlobDirectoryStateHelper.isExplicitDirectory(testPath, fs));
+
+ // Assert that getFileStatus Succeeds
+ FileStatus fileStatus = fs.getFileStatus(testPath);
+ assertNotNull(fileStatus.getPath());
+ assertTrue(fileStatus.isDirectory());
+ assertEquals(0L, fileStatus.getLen());
+ }
+
+ @Test
+ public void testFileStatusOnNonExistingPathWithExplicitParent() throws Exception {
+ final AzureBlobFileSystem fs = getFileSystem();
+ Path testPath = new Path("a/b.txt");
+ fs.mkdirs(testPath.getParent());
+
+ assertTrue("Parent directory is explicit.",
+ BlobDirectoryStateHelper.isExplicitDirectory(testPath.getParent(), fs));
+
+ // assert that getFileStatus fails
+ intercept(IOException.class,
+ () -> fs.getFileStatus(testPath));
+ }
+
+ @Test
+ public void testFileStatusOnNonExistingPathWithImplicitParent() throws Exception {
+ final AzureBlobFileSystem fs = getFileSystem();
+ AzcopyHelper azcopyHelper = new AzcopyHelper(
+ getAccountName(),
+ getFileSystemName(),
+ getRawConfiguration(),
+ fs.getAbfsStore().getPrefixMode()
+ );
+
+ Path testPath = new Path("a/b.txt");
+ azcopyHelper.createFolderUsingAzcopy(fs.makeQualified(
+ testPath.getParent()).toUri().getPath().substring(1));
+
+ assertTrue("Parent directory is implicit.",
+ BlobDirectoryStateHelper.isImplicitDirectory(testPath.getParent(), fs));
+
+ // assert that getFileStatus Fails with IOException
+ intercept(IOException.class,
+ () -> fs.getFileStatus(testPath));
+ }
+
+ @Test
+ public void testFileStatusOnRoot() throws Exception {
+ final AzureBlobFileSystem fs = getFileSystem();
+ final Path path = new Path("/");
+ fs.setWorkingDirectory(new Path("/"));
+
+ // Assert that getFileSus on root path succeed.
+ FileStatus fileStatus = fs.getFileStatus(path);
+ assertTrue(fileStatus.isDirectory());
+ assertTrue(fileStatus.getLen() == 0L);
+ }
}
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java
index 2ebdc7492a18d6..14e9df12f47038 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java
@@ -20,6 +20,8 @@
import java.io.IOException;
import java.util.concurrent.RejectedExecutionException;
+import org.apache.hadoop.fs.azurebfs.services.AbfsBlobLease;
+import org.apache.hadoop.fs.azurebfs.services.AbfsDfsLease;
import org.apache.hadoop.fs.azurebfs.services.OperativeEndpoint;
import org.apache.hadoop.fs.azurebfs.services.PrefixMode;
import org.junit.Assert;
@@ -347,8 +349,14 @@ public void testAcquireRetry() throws Exception {
FSOperationType.TEST_OP, true, 0);
tracingContext.setListener(listener);
- AbfsLease lease = new AbfsLease(fs.getAbfsClient(),
- testFilePath.toUri().getPath(), tracingContext);
+ AbfsLease lease;
+ if(getPrefixMode(fs) == PrefixMode.BLOB) {
+ lease = new AbfsBlobLease(fs.getAbfsClient(),
+ testFilePath.toUri().getPath(), null, tracingContext);
+ } else {
+ lease = new AbfsDfsLease(fs.getAbfsClient(),
+ testFilePath.toUri().getPath(), null, tracingContext);
+ }
Assert.assertNotNull("Did not successfully lease file", lease.getLeaseID());
listener.setOperation(FSOperationType.RELEASE_LEASE);
lease.free();
@@ -362,7 +370,18 @@ public void testAcquireRetry() throws Exception {
.doCallRealMethod().when(mockClient)
.acquireLease(anyString(), anyInt(), any(TracingContext.class));
- lease = new AbfsLease(mockClient, testFilePath.toUri().getPath(), 5, 1, tracingContext);
+ doThrow(new AbfsLease.LeaseException("failed to acquire 1"))
+ .doThrow(new AbfsLease.LeaseException("failed to acquire 2"))
+ .doCallRealMethod().when(mockClient)
+ .acquireBlobLease(anyString(), anyInt(), any(TracingContext.class));
+
+ if(getPrefixMode(fs) == PrefixMode.BLOB) {
+ lease = new AbfsBlobLease(mockClient, testFilePath.toUri().getPath(), 5, 1, null,
+ tracingContext);
+ } else {
+ lease = new AbfsDfsLease(mockClient, testFilePath.toUri().getPath(), 5, 1, null,
+ tracingContext);
+ }
Assert.assertNotNull("Acquire lease should have retried", lease.getLeaseID());
lease.free();
Assert.assertEquals("Unexpected acquire retry count", 2, lease.getAcquireRetryCount());
@@ -370,9 +389,17 @@ public void testAcquireRetry() throws Exception {
doThrow(new AbfsLease.LeaseException("failed to acquire")).when(mockClient)
.acquireLease(anyString(), anyInt(), any(TracingContext.class));
+ doThrow(new AbfsLease.LeaseException("failed to acquire")).when(mockClient)
+ .acquireBlobLease(anyString(), anyInt(), any(TracingContext.class));
+
LambdaTestUtils.intercept(AzureBlobFileSystemException.class, () -> {
- new AbfsLease(mockClient, testFilePath.toUri().getPath(), 5, 1,
- tracingContext);
+ if(getPrefixMode(fs) == PrefixMode.BLOB) {
+ new AbfsBlobLease(mockClient, testFilePath.toUri().getPath(), 5, 1, null,
+ tracingContext);
+ } else {
+ new AbfsDfsLease(mockClient, testFilePath.toUri().getPath(), 5, 1, null,
+ tracingContext);
+ }
});
}
}
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java
index 6bed7839935f7c..acdcf66942ab78 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java
@@ -20,8 +20,12 @@
import java.util.UUID;
+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
+import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
import org.apache.hadoop.fs.azurebfs.services.OperativeEndpoint;
import org.apache.hadoop.fs.azurebfs.services.PrefixMode;
+import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient;
+import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
import org.junit.Assume;
import org.junit.Test;
@@ -29,6 +33,7 @@
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.mockito.Mockito;
import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE;
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_MKDIR_OVERWRITE;
@@ -152,4 +157,29 @@ public void testCreateDirOverwrite(boolean enableConditionalCreateOverwrite)
totalConnectionMadeBeforeTest + mkdirRequestCount,
fs.getInstrumentationMap());
}
+
+ @Test
+ public void testVerifyGetBlobProperty() throws Exception {
+ Assume.assumeTrue(getFileSystem().getAbfsStore().getPrefixMode() == PrefixMode.BLOB);
+ AzureBlobFileSystem fs = Mockito.spy(getFileSystem());
+ AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore());
+ Mockito.doReturn(store).when(fs).getAbfsStore();
+ AbfsClient client = store.getClient();
+ AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext(
+ client,
+ fs.getAbfsStore().getAbfsConfiguration()));
+ store.setClient(testClient);
+
+ createAzCopyDirectory(new Path("/src"));
+ intercept(AbfsRestOperationException.class, () -> {
+ store.getBlobProperty(new Path("/src"), Mockito.mock(
+ TracingContext.class));
+ });
+ fs.mkdirs(new Path("/src/dir"));
+ Mockito.verify(testClient, Mockito.times(0)).getPathStatus(Mockito.any(String.class),
+ Mockito.anyBoolean(), Mockito.any(TracingContext.class));
+ Mockito.verify(testClient, Mockito.times(1)).getBlobProperty(Mockito.any(Path.class),
+ Mockito.any(TracingContext.class));
+
+ }
}
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java
index 0a9713cffa94b5..b4a8582c57d78c 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java
@@ -18,14 +18,20 @@
package org.apache.hadoop.fs.azurebfs;
import java.io.EOFException;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.UUID;
+import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
+import org.apache.hadoop.fs.azurebfs.services.PrefixMode;
+import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient;
+import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
import org.junit.Assume;
import org.junit.Ignore;
import org.junit.Test;
+import org.mockito.Mockito;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -574,6 +580,46 @@ public void testAlwaysReadBufferSizeConfig(boolean alwaysReadBufferSizeConfigVal
assertAbfsStatistics(BYTES_RECEIVED, dateSizeReadStatAtStart + newDataSizeRead, fs.getInstrumentationMap());
}
+ @Test
+ public void testReadBlob() throws IOException {
+ Assume.assumeTrue(PrefixMode.BLOB == getFileSystem().getAbfsStore().getPrefixMode());
+ AzureBlobFileSystem fs = Mockito.spy(getFileSystem());
+ AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore());
+ AbfsClient client = store.getClient();
+ AbfsClient mockClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext(
+ client,
+ fs.getAbfsStore().getAbfsConfiguration()
+ ));
+ store.setClient(mockClient);
+ Mockito.doReturn(mockClient).when(store).getClient();
+ Mockito.doReturn(store).when(fs).getAbfsStore();
+
+ Path testPath = new Path("/testReadFile");
+ fs.create(testPath);
+ FSDataInputStream in = fs.open(testPath);
+ Mockito.verify(mockClient, Mockito.atLeast(1)).getBlobProperty(
+ Mockito.any(Path.class), Mockito.any(TracingContext.class));
+ Mockito.verify(mockClient, Mockito.times(0)).getPathStatus(
+ Mockito.any(String.class), Mockito.anyBoolean(), Mockito.any(TracingContext.class));
+ }
+
+ @Test
+ public void testInvalidImplicitDirRead() throws Exception {
+ AzureBlobFileSystem fs = (AzureBlobFileSystem) getFileSystem();
+ AzcopyHelper azcopyhelper = new AzcopyHelper(getAccountName(),
+ getFileSystemName(),
+ getRawConfiguration(),
+ fs.getAbfsStore().getPrefixMode());
+ String fullPath = "/implicitDirPath/testFile";
+ String path = "/implicitDirPath";
+ azcopyhelper.createFolderUsingAzcopy(
+ fs.makeQualified(new Path(fullPath)).toUri().getPath().substring(1)
+ );
+
+ intercept(FileNotFoundException.class, () ->
+ fs.open(new Path(path)));
+
+ }
private long sequentialRead(String version,
Path testPath,
FileSystem fs,
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java
index 5057664ab2af17..a3eebf3e9dc9cc 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java
@@ -61,6 +61,7 @@
import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
import org.apache.hadoop.fs.azurebfs.services.PrefixMode;
+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LEASE_CREATE_NON_RECURSIVE;
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_REDIRECT_RENAME;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE;
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_INGRESS_FALLBACK_TO_DFS;
@@ -1735,8 +1736,10 @@ public void testParallelAppendToFileBeingCopiedInAtomicDirectory()
@Test
public void testParallelCreateNonRecursiveToFilePartOfAtomicDirectoryInRename()
throws Exception {
- FileSystem fsCreate = FileSystem.newInstance(getRawConfiguration());
- AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(getRawConfiguration());
+ Configuration configuration = Mockito.spy(getRawConfiguration());
+ configuration.set(FS_AZURE_LEASE_CREATE_NON_RECURSIVE, "true");
+ FileSystem fsCreate = FileSystem.newInstance(configuration);
+ AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration);
assumeNonHnsAccountBlobEndpoint(fs);
fs.setWorkingDirectory(new Path("/"));
fs.mkdirs(new Path("/hbase/dir1"));
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java
index 2c0bd31bf8eeb8..8ef5f1d4516703 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java
@@ -202,7 +202,7 @@ public void testAppendWithCPK() throws Exception {
// Trying to append with correct CPK headers
AppendRequestParameters appendRequestParameters =
new AppendRequestParameters(
- 0, 0, 5, Mode.APPEND_MODE, false, null);
+ 0, 0, 5, Mode.APPEND_MODE, false, null, true);
byte[] buffer = getRandomBytesArray(5);
AbfsClient abfsClient = fs.getAbfsClient();
AbfsRestOperation abfsRestOperation = abfsClient
@@ -247,7 +247,7 @@ public void testAppendWithoutCPK() throws Exception {
// Trying to append without CPK headers
AppendRequestParameters appendRequestParameters =
new AppendRequestParameters(
- 0, 0, 5, Mode.APPEND_MODE, false, null);
+ 0, 0, 5, Mode.APPEND_MODE, false, null, true);
byte[] buffer = getRandomBytesArray(5);
AbfsClient abfsClient = fs.getAbfsClient();
AbfsRestOperation abfsRestOperation = abfsClient
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java
index ccfdb72cdb0c98..fbfd6884bb363a 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java
@@ -32,6 +32,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.azurebfs.enums.Trilean;
+import org.apache.hadoop.fs.azurebfs.services.PrefixMode;
import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_DNS_PREFIX;
@@ -142,11 +143,20 @@ public void testFailedRequestWhenFSNotExist() throws Exception {
+ testUri.substring(testUri.indexOf("@"));
AzureBlobFileSystem fs = this.getFileSystem(nonExistingFsUrl);
- intercept(FileNotFoundException.class,
- "\"The specified filesystem does not exist.\", 404",
- ()-> {
- fs.getFileStatus(new Path("/")); // Run a dummy FS call
- });
+ if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) {
+ intercept(FileNotFoundException.class,
+ "\"The specified container does not exist.\", 404",
+ ()-> {
+ fs.getFileStatus(new Path("/")); // Run a dummy FS call
+ });
+ }
+ else {
+ intercept(FileNotFoundException.class,
+ "\"The specified filesystem does not exist.\", 404",
+ ()-> {
+ fs.getFileStatus(new Path("/")); // Run a dummy FS call
+ });
+ }
}
@Test
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlobProducer.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlobProducer.java
index 0093f699ce5110..541e17870931bf 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlobProducer.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlobProducer.java
@@ -19,6 +19,12 @@
package org.apache.hadoop.fs.azurebfs;
import java.net.HttpURLConnection;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@@ -34,13 +40,14 @@
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation;
+import org.apache.hadoop.fs.azurebfs.services.BlobProperty;
import org.apache.hadoop.fs.azurebfs.services.ListBlobConsumer;
import org.apache.hadoop.fs.azurebfs.services.ListBlobProducer;
import org.apache.hadoop.fs.azurebfs.services.ListBlobQueue;
import org.apache.hadoop.fs.azurebfs.services.PrefixMode;
import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
-import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_MAX_CONSUMER_LAG;
+import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_PRODUCER_QUEUE_MAX_SIZE;
public class ITestListBlobProducer extends AbstractAbfsIntegrationTest {
@@ -59,7 +66,7 @@ public void setup() throws Exception {
@Test
public void testProducerWaitingForConsumerLagToGoDown() throws Exception {
Configuration configuration = Mockito.spy(getRawConfiguration());
- configuration.set(FS_AZURE_MAX_CONSUMER_LAG, "10");
+ configuration.set(FS_AZURE_PRODUCER_QUEUE_MAX_SIZE, "10");
AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(
configuration);
AbfsClient client = fs.getAbfsClient();
@@ -67,57 +74,67 @@ public void testProducerWaitingForConsumerLagToGoDown() throws Exception {
fs.getAbfsStore().setClient(spiedClient);
fs.setWorkingDirectory(new Path("/"));
fs.mkdirs(new Path("/src"));
+ ExecutorService executor = Executors.newFixedThreadPool(5);
+ List futureList = new ArrayList<>();
for (int i = 0; i < 20; i++) {
- fs.create(new Path("/src/file" + i));
+ int iter = i;
+ futureList.add(executor.submit(() -> {
+ return fs.create(new Path("/src/file" + iter));
+ }));
+ }
+ for(Future future : futureList) {
+ future.get();
}
- AtomicBoolean produced = new AtomicBoolean(true);
AtomicInteger producedBlobs = new AtomicInteger(0);
AtomicInteger listBlobInvoked = new AtomicInteger(0);
+ final ITestListBlobProducer testObj = this;
+ final ListBlobQueue queue = new ListBlobQueue(
+ fs.getAbfsStore().getAbfsConfiguration().getProducerQueueMaxSize(),
+ 1);
+
Mockito.doAnswer(answer -> {
- listBlobInvoked.incrementAndGet();
- AbfsRestOperation op = client.getListBlobs(answer.getArgument(0),
- answer.getArgument(1), 1, answer.getArgument(3));
- producedBlobs.incrementAndGet();
- produced.set(true);
- return op;
+ synchronized (testObj) {
+ listBlobInvoked.incrementAndGet();
+ AbfsRestOperation op = client.getListBlobs(answer.getArgument(0),
+ answer.getArgument(1), 1, answer.getArgument(3));
+ producedBlobs.incrementAndGet();
+ if(producedBlobs.get() > 10) {
+ Assert.assertTrue(queue.availableSize() > 0);
+ }
+ return op;
+ }
})
.when(spiedClient)
.getListBlobs(Mockito.nullable(String.class),
Mockito.nullable(String.class), Mockito.nullable(Integer.class),
Mockito.nullable(TracingContext.class));
- ListBlobQueue queue = new ListBlobQueue(null);
+
ListBlobProducer producer = new ListBlobProducer("src/", spiedClient, queue,
null, Mockito.mock(
TracingContext.class));
ListBlobConsumer consumer = new ListBlobConsumer(queue);
while (producedBlobs.get() < 10) ;
- int producedBlobCount = producedBlobs.get();
-
int oldInvocation = listBlobInvoked.get();
- Thread.sleep(10_000L);
Assert.assertTrue(listBlobInvoked.get() == oldInvocation);
while (!consumer.isCompleted()) {
- produced.set(false);
- consumer.consume();
- while (!produced.get() && !queue.getIsCompleted()) ;
- if (!queue.getIsCompleted()) {
- Assert.assertEquals(producedBlobs.get() - 1, producedBlobCount);
+ synchronized (testObj) {
+ consumer.consume();
+ Assert.assertTrue(queue.availableSize() > 0);
}
- producedBlobCount = producedBlobs.get();
}
- Assert.assertTrue(producedBlobCount == 20);
+ Assert.assertTrue(producedBlobs.get() == 20);
}
@Test
public void testConsumerWhenProducerThrowException() throws Exception {
Configuration configuration = Mockito.spy(getRawConfiguration());
- configuration.set(FS_AZURE_MAX_CONSUMER_LAG, "10");
+ configuration.set(FS_AZURE_PRODUCER_QUEUE_MAX_SIZE, "10");
AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(
configuration);
AbfsClient client = fs.getAbfsClient();
@@ -139,7 +156,8 @@ public void testConsumerWhenProducerThrowException() throws Exception {
Mockito.nullable(String.class), Mockito.nullable(Integer.class),
Mockito.nullable(TracingContext.class));
- ListBlobQueue queue = new ListBlobQueue(null);
+ ListBlobQueue queue = new ListBlobQueue(getConfiguration().getProducerQueueMaxSize(),
+ getConfiguration().getProducerQueueMaxSize());
ListBlobProducer producer = new ListBlobProducer("src/", spiedClient, queue,
null, Mockito.mock(
TracingContext.class));
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java
index 565eb38c4f70a7..9e40f22d231b05 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java
@@ -24,6 +24,9 @@
public final class TestConfigurationKeys {
public static final String FS_AZURE_ACCOUNT_NAME = "fs.azure.account.name";
public static final String FS_AZURE_ABFS_ACCOUNT_NAME = "fs.azure.abfs.account.name";
+ public static final String FS_AZURE_ABFS_ACCOUNT1_NAME = "fs.azure.abfs.account1.name";
+ public static final String FS_AZURE_ENABLE_AUTOTHROTTLING = "fs.azure.enable.autothrottling";
+ public static final String FS_AZURE_ANALYSIS_PERIOD = "fs.azure.analysis.period";
public static final String FS_AZURE_ACCOUNT_KEY = "fs.azure.account.key";
public static final String FS_AZURE_CONTRACT_TEST_URI = "fs.contract.test.fs.abfs";
public static final String FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT = "fs.azure.test.namespace.enabled";
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationTestUtil.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationTestUtil.java
index d1c661eea3b780..56556b1930566d 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationTestUtil.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationTestUtil.java
@@ -41,7 +41,7 @@ public static void addAbfsHttpOpProcessResponseMock(final AbfsRestOperation spie
spiedRestOp.getMethod(), spiedRestOp.getRequestHeaders());
AbfsHttpOperation spiedOp = Mockito.spy(op);
return functionRaisingIOE.apply(spiedOp, actualOp);
- }).when(spiedRestOp).createNewHttpOperation();
+ }).when(spiedRestOp).createHttpOperation();
}
public static void setResult(final AbfsRestOperation op, final AbfsHttpOperation result) {
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java
similarity index 62%
rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java
rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java
index 8dfef876561f73..e798a4baa36ab0 100644
--- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java
@@ -20,21 +20,44 @@
import java.io.IOException;
import java.lang.reflect.Field;
+import java.net.HttpURLConnection;
+import java.net.ProtocolException;
import java.net.URL;
import java.util.List;
+import java.util.Random;
import java.util.regex.Pattern;
import org.junit.Ignore;
+import org.assertj.core.api.Assertions;
import org.junit.Test;
+import org.mockito.Mockito;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
+import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest;
+import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;
+import org.apache.hadoop.fs.azurebfs.TestAbfsConfigurationFieldsValidation;
+import org.apache.hadoop.fs.azurebfs.constants.FSOperationType;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
+import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters;
import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys;
+import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
+import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat;
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
-import static org.assertj.core.api.Assertions.assertThat;
+import static java.net.HttpURLConnection.HTTP_NOT_FOUND;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE;
+import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT;
+import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE;
+import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_ACTION;
+import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_POSITION;
+import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -60,14 +83,19 @@
* Test useragent of abfs client.
*
*/
-public final class TestAbfsClient {
+public final class ITestAbfsClient extends AbstractAbfsIntegrationTest {
private static final String ACCOUNT_NAME = "bogusAccountName.dfs.core.windows.net";
private static final String FS_AZURE_USER_AGENT_PREFIX = "Partner Service";
+ private static final String TEST_PATH = "/testfile";
+ public static final int REDUCED_RETRY_COUNT = 2;
+ public static final int REDUCED_BACKOFF_INTERVAL = 100;
+ public static final int BUFFER_LENGTH = 5;
+ public static final int BUFFER_OFFSET = 0;
private final Pattern userAgentStringPattern;
- public TestAbfsClient(){
+ public ITestAbfsClient() throws Exception {
StringBuilder regEx = new StringBuilder();
regEx.append("^");
regEx.append(APN_VERSION);
@@ -125,7 +153,7 @@ public void verifybBasicInfo() throws Exception {
}
private void verifybBasicInfo(String userAgentStr) {
- assertThat(userAgentStr)
+ Assertions.assertThat(userAgentStr)
.describedAs("User-Agent string [" + userAgentStr
+ "] should be of the pattern: " + this.userAgentStringPattern.pattern())
.matches(this.userAgentStringPattern)
@@ -155,7 +183,7 @@ public void verifyUserAgentPrefix()
String userAgentStr = getUserAgentString(abfsConfiguration, false);
verifybBasicInfo(userAgentStr);
- assertThat(userAgentStr)
+ Assertions.assertThat(userAgentStr)
.describedAs("User-Agent string should contain " + FS_AZURE_USER_AGENT_PREFIX)
.contains(FS_AZURE_USER_AGENT_PREFIX);
@@ -165,7 +193,7 @@ public void verifyUserAgentPrefix()
userAgentStr = getUserAgentString(abfsConfiguration, false);
verifybBasicInfo(userAgentStr);
- assertThat(userAgentStr)
+ Assertions.assertThat(userAgentStr)
.describedAs("User-Agent string should not contain " + FS_AZURE_USER_AGENT_PREFIX)
.doesNotContain(FS_AZURE_USER_AGENT_PREFIX);
}
@@ -181,14 +209,14 @@ public void verifyUserAgentWithoutSSLProvider() throws Exception {
String userAgentStr = getUserAgentString(abfsConfiguration, true);
verifybBasicInfo(userAgentStr);
- assertThat(userAgentStr)
+ Assertions.assertThat(userAgentStr)
.describedAs("User-Agent string should contain sslProvider")
.contains(DelegatingSSLSocketFactory.getDefaultFactory().getProviderName());
userAgentStr = getUserAgentString(abfsConfiguration, false);
verifybBasicInfo(userAgentStr);
- assertThat(userAgentStr)
+ Assertions.assertThat(userAgentStr)
.describedAs("User-Agent string should not contain sslProvider")
.doesNotContain(DelegatingSSLSocketFactory.getDefaultFactory().getProviderName());
}
@@ -204,7 +232,7 @@ public void verifyUserAgentClusterName() throws Exception {
String userAgentStr = getUserAgentString(abfsConfiguration, false);
verifybBasicInfo(userAgentStr);
- assertThat(userAgentStr)
+ Assertions.assertThat(userAgentStr)
.describedAs("User-Agent string should contain cluster name")
.contains(clusterName);
@@ -214,7 +242,7 @@ public void verifyUserAgentClusterName() throws Exception {
userAgentStr = getUserAgentString(abfsConfiguration, false);
verifybBasicInfo(userAgentStr);
- assertThat(userAgentStr)
+ Assertions.assertThat(userAgentStr)
.describedAs("User-Agent string should not contain cluster name")
.doesNotContain(clusterName)
.describedAs("User-Agent string should contain UNKNOWN as cluster name config is absent")
@@ -232,7 +260,7 @@ public void verifyUserAgentClusterType() throws Exception {
String userAgentStr = getUserAgentString(abfsConfiguration, false);
verifybBasicInfo(userAgentStr);
- assertThat(userAgentStr)
+ Assertions.assertThat(userAgentStr)
.describedAs("User-Agent string should contain cluster type")
.contains(clusterType);
@@ -242,7 +270,7 @@ public void verifyUserAgentClusterType() throws Exception {
userAgentStr = getUserAgentString(abfsConfiguration, false);
verifybBasicInfo(userAgentStr);
- assertThat(userAgentStr)
+ Assertions.assertThat(userAgentStr)
.describedAs("User-Agent string should not contain cluster type")
.doesNotContain(clusterType)
.describedAs("User-Agent string should contain UNKNOWN as cluster type config is absent")
@@ -308,24 +336,28 @@ public static AbfsClient getMockAbfsClient(AbfsClient baseAbfsClientInstance,
when(client.getAccessToken()).thenCallRealMethod();
when(client.getSharedKeyCredentials()).thenCallRealMethod();
when(client.createDefaultHeaders()).thenCallRealMethod();
-
+ when(client.getAbfsConfiguration()).thenReturn(abfsConfig);
+ when(client.getIntercept()).thenReturn(
+ AbfsThrottlingInterceptFactory.getInstance(
+ abfsConfig.getAccountName().substring(0,
+ abfsConfig.getAccountName().indexOf(DOT)), abfsConfig));
// override baseurl
- client = TestAbfsClient.setAbfsClientField(client, "abfsConfiguration",
+ client = ITestAbfsClient.setAbfsClientField(client, "abfsConfiguration",
abfsConfig);
// override baseurl
- client = TestAbfsClient.setAbfsClientField(client, "baseUrl",
+ client = ITestAbfsClient.setAbfsClientField(client, "baseUrl",
baseAbfsClientInstance.getBaseUrl());
// override auth provider
if (currentAuthType == AuthType.SharedKey) {
- client = TestAbfsClient.setAbfsClientField(client, "sharedKeyCredentials",
+ client = ITestAbfsClient.setAbfsClientField(client, "sharedKeyCredentials",
new SharedKeyCredentials(
abfsConfig.getAccountName().substring(0,
abfsConfig.getAccountName().indexOf(DOT)),
abfsConfig.getStorageAccountKey()));
} else {
- client = TestAbfsClient.setAbfsClientField(client, "tokenProvider",
+ client = ITestAbfsClient.setAbfsClientField(client, "tokenProvider",
abfsConfig.getTokenProvider());
}
@@ -333,7 +365,7 @@ public static AbfsClient getMockAbfsClient(AbfsClient baseAbfsClientInstance,
String userAgent = "APN/1.0 Azure Blob FS/3.4.0-SNAPSHOT (PrivateBuild "
+ "JavaJRE 1.8.0_252; Linux 5.3.0-59-generic/amd64; openssl-1.0; "
+ "UNKNOWN/UNKNOWN) MSFT";
- client = TestAbfsClient.setAbfsClientField(client, "userAgent", userAgent);
+ client = ITestAbfsClient.setAbfsClientField(client, "userAgent", userAgent);
return client;
}
@@ -397,4 +429,160 @@ public static AbfsRestOperation getRestOp(AbfsRestOperationType type,
url,
requestHeaders);
}
+
+ public static AccessTokenProvider getAccessTokenProvider(AbfsClient client) {
+ return client.getTokenProvider();
+ }
+
+ /**
+ * Test helper method to get random bytes array.
+ * @param length The length of byte buffer.
+ * @return byte buffer.
+ */
+ private byte[] getRandomBytesArray(int length) {
+ final byte[] b = new byte[length];
+ new Random().nextBytes(b);
+ return b;
+ }
+
+ /**
+ * Test to verify that client retries append request without
+ * expect header enabled if append with expect header enabled fails
+ * with 4xx kind of error.
+ * @throws Exception
+ */
+ @Test
+ public void testExpectHundredContinue() throws Exception {
+ // Get the filesystem.
+ final AzureBlobFileSystem fs = getFileSystem();
+
+ final Configuration configuration = new Configuration();
+ configuration.addResource(TEST_CONFIGURATION_FILE_NAME);
+ AbfsClient abfsClient = getClient(fs);
+
+ AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration,
+ configuration.get(FS_AZURE_ABFS_ACCOUNT_NAME));
+
+ // Update the configuration with reduced retry count and reduced backoff interval.
+ AbfsConfiguration abfsConfig
+ = TestAbfsConfigurationFieldsValidation.updateRetryConfigs(
+ abfsConfiguration,
+ REDUCED_RETRY_COUNT, REDUCED_BACKOFF_INTERVAL);
+
+ // Gets the client.
+ AbfsClient testClient = Mockito.spy(
+ ITestAbfsClient.createTestClientFromCurrentContext(
+ abfsClient,
+ abfsConfig));
+
+ // Create the append request params with expect header enabled initially.
+ AppendRequestParameters appendRequestParameters
+ = new AppendRequestParameters(
+ BUFFER_OFFSET, BUFFER_OFFSET, BUFFER_LENGTH,
+ AppendRequestParameters.Mode.APPEND_MODE, false, null, true);
+
+ byte[] buffer = getRandomBytesArray(BUFFER_LENGTH);
+
+ // Create a test container to upload the data.
+ Path testPath = path(TEST_PATH);
+ fs.create(testPath);
+ String finalTestPath = testPath.toString()
+ .substring(testPath.toString().lastIndexOf("/"));
+
+ // Creates a list of request headers.
+ final List requestHeaders
+ = ITestAbfsClient.getTestRequestHeaders(testClient);
+ requestHeaders.add(
+ new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH));
+ if (appendRequestParameters.isExpectHeaderEnabled()) {
+ requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE));
+ }
+
+ // Updates the query parameters.
+ final AbfsUriQueryBuilder abfsUriQueryBuilder
+ = testClient.createDefaultUriQueryBuilder();
+ abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, APPEND_ACTION);
+ abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION,
+ Long.toString(appendRequestParameters.getPosition()));
+
+ // Creates the url for the specified path.
+ URL url = testClient.createRequestUrl(finalTestPath, abfsUriQueryBuilder.toString());
+
+ // Create a mock of the AbfsRestOperation to set the urlConnection in the corresponding httpOperation.
+ AbfsRestOperation op = Mockito.spy(new AbfsRestOperation(
+ AbfsRestOperationType.Append,
+ testClient,
+ HTTP_METHOD_PUT,
+ url,
+ requestHeaders, buffer,
+ appendRequestParameters.getoffset(),
+ appendRequestParameters.getLength(), null));
+
+ AbfsHttpOperation abfsHttpOperation = Mockito.spy(new AbfsHttpOperation(url,
+ HTTP_METHOD_PUT, requestHeaders));
+
+ // Sets the expect request property if expect header is enabled.
+ if (appendRequestParameters.isExpectHeaderEnabled()) {
+ Mockito.doReturn(HUNDRED_CONTINUE).when(abfsHttpOperation)
+ .getConnProperty(EXPECT);
+ }
+
+ HttpURLConnection urlConnection = mock(HttpURLConnection.class);
+ Mockito.doNothing().when(urlConnection).setRequestProperty(Mockito
+ .any(), Mockito.any());
+ Mockito.doReturn(HTTP_METHOD_PUT).when(urlConnection).getRequestMethod();
+ Mockito.doReturn(url).when(urlConnection).getURL();
+ Mockito.doReturn(urlConnection).when(abfsHttpOperation).getConnection();
+
+ Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito
+ .any(), Mockito.any());
+ Mockito.doReturn(url).when(abfsHttpOperation).getConnUrl();
+
+ // Give user error code 404 when processResponse is called.
+ Mockito.doReturn(HTTP_METHOD_PUT).when(abfsHttpOperation).getConnRequestMethod();
+ Mockito.doReturn(HTTP_NOT_FOUND).when(abfsHttpOperation).getConnResponseCode();
+ Mockito.doReturn("Resource Not Found")
+ .when(abfsHttpOperation)
+ .getConnResponseMessage();
+
+ // Make the getOutputStream throw IOException to see it returns from the sendRequest correctly.
+ Mockito.doThrow(new ProtocolException("Server rejected Operation"))
+ .when(abfsHttpOperation)
+ .getConnOutputStream();
+
+ // Sets the httpOperation for the rest operation.
+ Mockito.doReturn(abfsHttpOperation)
+ .when(op)
+ .createHttpOperation();
+
+ // Mock the restOperation for the client.
+ Mockito.doReturn(op)
+ .when(testClient)
+ .getAbfsRestOperationForAppend(Mockito.any(),
+ Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any(),
+ Mockito.nullable(int.class), Mockito.nullable(int.class),
+ Mockito.any());
+
+ TracingContext tracingContext = Mockito.spy(new TracingContext("abcd",
+ "abcde", FSOperationType.APPEND,
+ TracingHeaderFormat.ALL_ID_FORMAT, null));
+
+ // Check that expect header is enabled before the append call.
+ Assertions.assertThat(appendRequestParameters.isExpectHeaderEnabled())
+ .describedAs("The expect header is not true before the append call")
+ .isTrue();
+
+ intercept(AzureBlobFileSystemException.class,
+ () -> testClient.append(finalTestPath, buffer, appendRequestParameters, null, tracingContext));
+
+ // Verify that the request was not exponentially retried because of user error.
+ Assertions.assertThat(tracingContext.getRetryCount())
+ .describedAs("The retry count is incorrect")
+ .isEqualTo(0);
+
+ // Verify that the same request was retried with expect header disabled.
+ Assertions.assertThat(appendRequestParameters.isExpectHeaderEnabled())
+ .describedAs("The expect header is not false")
+ .isFalse();
+ }
}
diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java
new file mode 100644
index 00000000000000..fe3c2a9892c4c6
--- /dev/null
+++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java
@@ -0,0 +1,358 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs.services;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.HttpURLConnection;
+import java.net.ProtocolException;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+
+import org.assertj.core.api.Assertions;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.mockito.Mockito;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
+import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest;
+import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;
+import org.apache.hadoop.fs.azurebfs.TestAbfsConfigurationFieldsValidation;
+import org.apache.hadoop.fs.azurebfs.constants.FSOperationType;
+import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException;
+import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters;
+import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
+import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat;
+
+import static java.net.HttpURLConnection.HTTP_NOT_FOUND;
+import static java.net.HttpURLConnection.HTTP_OK;
+import static java.net.HttpURLConnection.HTTP_UNAVAILABLE;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE;
+import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT;
+import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE;
+import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_ACTION;
+import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_POSITION;
+import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME;
+import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.times;
+
+@RunWith(Parameterized.class)
+public class ITestAbfsRestOperation extends AbstractAbfsIntegrationTest {
+
+ // Specifies whether getOutputStream() or write() throws IOException.
+ public enum ErrorType {OUTPUTSTREAM, WRITE};
+
+ private static final int HTTP_EXPECTATION_FAILED = 417;
+ private static final int HTTP_ERROR = 0;
+ private static final int ZERO = 0;
+ private static final int REDUCED_RETRY_COUNT = 2;
+ private static final int REDUCED_BACKOFF_INTERVAL = 100;
+ private static final int BUFFER_LENGTH = 5;
+ private static final int BUFFER_OFFSET = 0;
+ private static final String TEST_PATH = "/testfile";
+
+ // Specifies whether the expect header is enabled or not.
+ @Parameterized.Parameter
+ public boolean expectHeaderEnabled;
+
+ // Gives the http response code.
+ @Parameterized.Parameter(1)
+ public int responseCode;
+
+ // Gives the http response message.
+ @Parameterized.Parameter(2)
+ public String responseMessage;
+
+ // Gives the errorType based on the enum.
+ @Parameterized.Parameter(3)
+ public ErrorType errorType;
+
+ // The intercept.
+ private AbfsThrottlingIntercept intercept;
+
+ /*
+ HTTP_OK = 200,
+ HTTP_UNAVAILABLE = 503,
+ HTTP_NOT_FOUND = 404,
+ HTTP_EXPECTATION_FAILED = 417,
+ HTTP_ERROR = 0.
+ */
+ @Parameterized.Parameters(name = "expect={0}-code={1}-ErrorType={3}")
+ public static Iterable