diff --git a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml index 070c8c1fe827ae..2065746b766116 100644 --- a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml +++ b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml @@ -48,4 +48,11 @@ files="org[\\/]apache[\\/]hadoop[\\/]fs[\\/]azurebfs[\\/]utils[\\/]Base64.java"/> + + + + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java index d7ba3debec209f..af2b696964451c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java @@ -807,7 +807,7 @@ public void initialize(URI uri, Configuration conf, AzureFileSystemInstrumentati LOG.debug("Page blob directories: {}", setToString(pageBlobDirs)); // User-agent - userAgentId = "wasbdriverV2.1"; + userAgentId = "wasbdriverV2.2"; // Extract the directories that should contain block blobs with compaction blockBlobWithCompationDirs = getDirectorySet( diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index 771f95a8b64e63..4179da21fd5e9e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.lang.reflect.Field; +import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; import org.apache.hadoop.fs.azurebfs.services.PrefixMode; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; @@ -119,6 +120,15 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_OPTIMIZE_FOOTER_READ) private boolean optimizeFooterRead; + @BooleanConfigurationValidatorAnnotation( + ConfigurationKey = FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED, + DefaultValue = DEFAULT_FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED) + private boolean isExpectHeaderEnabled; + + @BooleanConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED, + DefaultValue = DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED) + private boolean accountThrottlingEnabled; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = AZURE_READ_BUFFER_SIZE, MinValue = MIN_BUFFER_SIZE, MaxValue = MAX_BUFFER_SIZE, @@ -246,7 +256,7 @@ public class AbfsConfiguration{ private int readAheadQueueDepth; @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_BLOB_DIR_RENAME_MAX_THREAD, - DefaultValue = 0) + DefaultValue = DEFAULT_FS_AZURE_BLOB_RENAME_THREAD) private int blobDirRenameMaxThread; @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_BLOB_COPY_PROGRESS_POLL_WAIT_MILLIS, @@ -275,6 +285,14 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_ENABLE_AUTOTHROTTLING) private boolean enableAutoThrottling; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ACCOUNT_OPERATION_IDLE_TIMEOUT, + DefaultValue = DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS) + private int accountOperationIdleTimeout; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ANALYSIS_PERIOD, + DefaultValue = DEFAULT_ANALYSIS_PERIOD_MS) + private int analysisPeriod; + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_USER_AGENT_PREFIX_KEY, DefaultValue = DEFAULT_FS_AZURE_USER_AGENT_PREFIX) private String userAgentId; @@ -326,8 +344,12 @@ public class AbfsConfiguration{ FS_AZURE_ENABLE_ABFS_LIST_ITERATOR, DefaultValue = DEFAULT_ENABLE_ABFS_LIST_ITERATOR) private boolean enableAbfsListIterator; - @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_MAX_CONSUMER_LAG, DefaultValue = DEFAULT_FS_AZURE_MAX_CONSUMER_LAG) - private int maximumConsumerLag; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = + FS_AZURE_PRODUCER_QUEUE_MAX_SIZE, DefaultValue = DEFAULT_FS_AZURE_PRODUCER_QUEUE_MAX_SIZE) + private int producerQueueMaxSize; + + @BooleanConfigurationValidatorAnnotation(ConfigurationKey=FS_AZURE_LEASE_CREATE_NON_RECURSIVE, DefaultValue = DEFAULT_FS_AZURE_LEASE_CREATE_NON_RECURSIVE) + private boolean leaseOnCreateNonRecursive; public AbfsConfiguration(final Configuration rawConfig, String accountName) throws IllegalAccessException, InvalidConfigurationValueException, IOException { @@ -408,6 +430,11 @@ public boolean shouldEnableBlobEndPoint() { DefaultValue = DEFAULT_FS_AZURE_INGRESS_FALLBACK_TO_DFS) private boolean ingressFallbackToDfs; + @BooleanConfigurationValidatorAnnotation( + ConfigurationKey = FS_AZURE_READ_FALLBACK_TO_DFS, + DefaultValue = DEFAULT_AZURE_READ_FALLBACK_TO_DFS) + private boolean readFallbackToDfs; + public boolean shouldMkdirFallbackToDfs() { return mkdirFallbackToDfs; } @@ -416,6 +443,10 @@ public boolean shouldIngressFallbackToDfs() { return ingressFallbackToDfs; } + public boolean shouldReadFallbackToDfs() { + return readFallbackToDfs; + } + /** * Gets the Azure Storage account name corresponding to this instance of configuration. * @return the Azure Storage account name @@ -768,6 +799,14 @@ public String getAppendBlobDirs() { return this.azureAppendBlobDirs; } + public boolean isExpectHeaderEnabled() { + return this.isExpectHeaderEnabled; + } + + public boolean accountThrottlingEnabled() { + return accountThrottlingEnabled; + } + public String getAzureInfiniteLeaseDirs() { return this.azureInfiniteLeaseDirs; } @@ -810,8 +849,16 @@ public boolean isAutoThrottlingEnabled() { return this.enableAutoThrottling; } + public int getAccountOperationIdleTimeout() { + return accountOperationIdleTimeout; + } + + public int getAnalysisPeriod() { + return analysisPeriod; + } + public String getCustomUserAgentPrefix() { - return "abfsdriverV2.1"; + return "abfsdriverV2.2"; } public String getClusterName() { @@ -1184,8 +1231,11 @@ public void setEnableAbfsListIterator(boolean enableAbfsListIterator) { this.enableAbfsListIterator = enableAbfsListIterator; } - public int getMaximumConsumerLag() { - return maximumConsumerLag; + public int getProducerQueueMaxSize() { + return producerQueueMaxSize; } + public boolean isLeaseOnCreateNonRecursive() { + return leaseOnCreateNonRecursive; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index 0e22f5e4334e90..d79acfe3202a3c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -26,6 +26,7 @@ import java.net.HttpURLConnection; import java.net.URI; import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; import java.nio.file.AccessDeniedException; import java.util.Hashtable; import java.util.List; @@ -58,7 +59,6 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; -import org.apache.hadoop.fs.azurebfs.services.AbfsClientThrottlingIntercept; import org.apache.hadoop.fs.azurebfs.services.AbfsListStatusRemoteIterator; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.classification.InterfaceStability; @@ -114,6 +114,7 @@ import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL; import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_DEFAULT; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.*; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOB_LEASE_ONE_MINUTE_DURATION; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_BLOB_ENDPOINT; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_DNS_PREFIX; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.WASB_DNS_PREFIX; @@ -126,6 +127,8 @@ import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.PATH_EXISTS; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND; import static org.apache.hadoop.fs.azurebfs.constants.InternalConstants.CAPABILITY_SAFE_READAHEAD; +import static org.apache.hadoop.fs.azurebfs.utils.UriUtils.decodeMetadataAttribute; +import static org.apache.hadoop.fs.azurebfs.utils.UriUtils.encodeMetadataAttribute; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel; import static org.apache.hadoop.util.functional.RemoteIterators.filteringRemoteIterator; @@ -262,7 +265,6 @@ public void initialize(URI uri, Configuration configuration) } } - AbfsClientThrottlingIntercept.initializeSingleton(abfsConfiguration.isAutoThrottlingEnabled()); boolean isRedirect = abfsConfiguration.isRedirection(); if (isRedirect) { String abfsUrl = uri.toString(); @@ -365,7 +367,7 @@ private FSDataInputStream open(final Path path, TracingContext tracingContext = new TracingContext(clientCorrelationId, fileSystemId, FSOperationType.OPEN, tracingHeaderFormat, listener); - InputStream inputStream = abfsStore.openFileForRead(qualifiedPath, + InputStream inputStream = getAbfsStore().openFileForRead(qualifiedPath, options, statistics, tracingContext); return new FSDataInputStream(inputStream); } catch(AzureBlobFileSystemException ex) { @@ -510,8 +512,10 @@ public FSDataOutputStream createNonRecursive(final Path f, final FsPermission pe String parentPath = parent.toUri().getPath(); if (getAbfsStore().getAbfsConfiguration().getPrefixMode() == PrefixMode.BLOB && getAbfsStore().isAtomicRenameKey(parentPath)) { - abfsBlobLease = new AbfsBlobLease(getAbfsClient(), - parentPath, tracingContext); + if(getAbfsStore().getAbfsConfiguration().isLeaseOnCreateNonRecursive()) { + abfsBlobLease = new AbfsBlobLease(getAbfsClient(), + parentPath, BLOB_LEASE_ONE_MINUTE_DURATION, tracingContext); + } } final FileStatus parentFileStatus = tryGetFileStatus(parent, tracingContext); @@ -1046,10 +1050,21 @@ private FileStatus getFileStatus(final Path path, LOG.debug("AzureBlobFileSystem.getFileStatus path: {}", path); statIncrement(CALL_GET_FILE_STATUS); Path qualifiedPath = makeQualified(path); + FileStatus fileStatus; try { - FileStatus fileStatus = abfsStore.getFileStatus(qualifiedPath, - tracingContext); + if (abfsStore.getPrefixMode() == PrefixMode.BLOB) { + /** + * Get File Status over Blob Endpoint will Have an additional call + * to check if directory is implicit. + */ + fileStatus = abfsStore.getFileStatusOverBlob(qualifiedPath, + tracingContext); + } + else { + fileStatus = abfsStore.getFileStatus(qualifiedPath, + tracingContext); + } if (getAbfsStore().getAbfsConfiguration().getPrefixMode() == PrefixMode.BLOB && fileStatus != null && fileStatus.isDirectory() && @@ -1314,13 +1329,30 @@ public void setXAttr(final Path path, final String name, final byte[] value, fin TracingContext tracingContext = new TracingContext(clientCorrelationId, fileSystemId, FSOperationType.SET_ATTR, true, tracingHeaderFormat, listener); - Hashtable properties = abfsStore - .getPathStatus(qualifiedPath, tracingContext); + Hashtable properties; String xAttrName = ensureValidAttributeName(name); + String xAttrValue; + + if (abfsStore.getPrefixMode() == PrefixMode.BLOB) { + properties = abfsStore.getBlobMetadata(qualifiedPath, tracingContext); + + boolean xAttrExists = properties.containsKey(xAttrName); + XAttrSetFlag.validate(name, xAttrExists, flag); + + // On Blob Endpoint metadata are passed as HTTP Request Headers + // Values in UTF_8 needed to be URL encoded after decoding into String + xAttrValue = encodeMetadataAttribute(new String(value, StandardCharsets.UTF_8)); + properties.put(xAttrName, xAttrValue); + abfsStore.setBlobMetadata(qualifiedPath, properties, tracingContext); + + return; + } + + properties = abfsStore.getPathStatus(qualifiedPath, tracingContext); boolean xAttrExists = properties.containsKey(xAttrName); XAttrSetFlag.validate(name, xAttrExists, flag); - String xAttrValue = abfsStore.decodeAttribute(value); + xAttrValue = abfsStore.decodeAttribute(value); properties.put(xAttrName, xAttrValue); abfsStore.setPathProperties(qualifiedPath, properties, tracingContext); } catch (AzureBlobFileSystemException ex) { @@ -1354,9 +1386,21 @@ public byte[] getXAttr(final Path path, final String name) TracingContext tracingContext = new TracingContext(clientCorrelationId, fileSystemId, FSOperationType.GET_ATTR, true, tracingHeaderFormat, listener); - Hashtable properties = abfsStore - .getPathStatus(qualifiedPath, tracingContext); + Hashtable properties; String xAttrName = ensureValidAttributeName(name); + + if (abfsStore.getPrefixMode() == PrefixMode.BLOB) { + properties = abfsStore.getBlobMetadata(qualifiedPath, tracingContext); + if (properties.containsKey(xAttrName)) { + String xAttrValue = properties.get(xAttrName); + value = decodeMetadataAttribute(xAttrValue).getBytes( + StandardCharsets.UTF_8); + } + return value; + } + + properties = abfsStore.getPathStatus(qualifiedPath, tracingContext); + if (properties.containsKey(xAttrName)) { String xAttrValue = properties.get(xAttrName); value = abfsStore.encodeAttribute(xAttrValue); @@ -2022,4 +2066,5 @@ public boolean hasPathCapability(final Path path, final String capability) public IOStatistics getIOStatistics() { return abfsCounters != null ? abfsCounters.getIOStatistics() : null; } + } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 7816391c8dccd4..da3d2d6933e42f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -61,10 +61,12 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; import org.apache.hadoop.fs.azurebfs.enums.BlobCopyProgress; import org.apache.hadoop.fs.azurebfs.services.AbfsBlobLease; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsLease; import org.apache.hadoop.fs.azurebfs.services.ListBlobConsumer; import org.apache.hadoop.fs.azurebfs.services.ListBlobProducer; import org.apache.hadoop.fs.azurebfs.services.ListBlobQueue; import org.apache.hadoop.fs.azurebfs.services.OperativeEndpoint; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; import org.apache.hadoop.fs.azurebfs.services.PrefixMode; import org.apache.hadoop.fs.azurebfs.services.BlobList; import org.apache.hadoop.fs.azurebfs.services.BlobProperty; @@ -146,9 +148,12 @@ import org.apache.hadoop.util.concurrent.HadoopExecutors; import org.apache.http.client.utils.URIBuilder; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; -import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicityUtils.SUFFIX; import static java.net.HttpURLConnection.HTTP_CONFLICT; +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BLOB_LEASE_ONE_MINUTE_DURATION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_METADATA_PREFIX; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_EQUALS; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_FORWARD_SLASH; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_HYPHEN; @@ -176,6 +181,7 @@ import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_ABORTED; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.COPY_BLOB_FAILED; import static org.apache.hadoop.fs.azurebfs.services.AbfsErrors.PATH_EXISTS; +import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicityUtils.SUFFIX; /** * Provides the bridging logic between Hadoop's abstract filesystem and Azure Storage. @@ -206,8 +212,6 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport { private final AbfsCounters abfsCounters; private PrefixMode prefixMode; - private final ExecutorService renameBlobExecutorService; - /** * The set of directories where we should store files as append blobs. */ @@ -298,14 +302,6 @@ public AzureBlobFileSystemStore( abfsConfiguration.getMaxWriteRequestsToQueue(), 10L, TimeUnit.SECONDS, "abfs-bounded"); - if (abfsConfiguration.getBlobDirRenameMaxThread() == 0) { - renameBlobExecutorService = Executors.newFixedThreadPool( - Runtime.getRuntime() - .availableProcessors()); - } else { - renameBlobExecutorService = Executors.newFixedThreadPool( - abfsConfiguration.getBlobDirRenameMaxThread()); - } } /** @@ -686,18 +682,130 @@ BlobProperty getBlobProperty(Path blobPath, * @throws AzureBlobFileSystemException exception thrown from * {@link AbfsClient#getBlobProperty(Path, TracingContext)} call */ - BlobProperty getContainerProperty(TracingContext tracingContext) throws AzureBlobFileSystemException { - AbfsRestOperation op = client.getContainerProperty(tracingContext); - BlobProperty blobProperty = new BlobProperty(); + BlobProperty getContainerProperty(TracingContext tracingContext) + throws AzureBlobFileSystemException { + try (AbfsPerfInfo perfInfo = startTracking("getContainerProperty", "getContainerProperty")) { + LOG.debug("getContainerProperty for filesystem: {} path: {}", + client.getFileSystem()); - final AbfsHttpOperation opResult = op.getResult(); + AbfsRestOperation op = client.getContainerProperty(tracingContext); + perfInfo.registerResult(op.getResult()).registerSuccess(true); - blobProperty.setIsDirectory(true); - blobProperty.setPath(new Path("/")); + BlobProperty blobProperty = new BlobProperty(); + blobProperty.setIsDirectory(true); + blobProperty.setPath(new Path(FORWARD_SLASH)); - return blobProperty; + return blobProperty; + } + } + + /** + * Gets user-defined properties(metadata) of the blob over blob endpoint. + * @param path + * @param tracingContext + * @return hashmap containing key value pairs for blob metadata + * @throws AzureBlobFileSystemException + */ + public Hashtable getBlobMetadata(final Path path, + TracingContext tracingContext) throws AzureBlobFileSystemException { + try (AbfsPerfInfo perfInfo = startTracking("getBlobMetadata", "getBlobMetadata")) { + LOG.debug("getBlobMetadata for filesystem: {} path: {}", + client.getFileSystem(), + path); + + final AbfsRestOperation op = client.getBlobMetadata(path, tracingContext); + perfInfo.registerResult(op.getResult()).registerSuccess(true); + + final Hashtable metadata = parseResponseHeadersToHashTable(op.getResult()); + return metadata; + } + catch (AbfsRestOperationException ex) { + // The path does not exist explicitly. + // Check here if the path is an implicit dir + if (ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND && !path.isRoot()) { + List blobProperties = getListBlobs(path, null, + tracingContext, 2, true); + if (blobProperties.size() == 0) { + throw ex; + } + else { + // Path exists as implicit directory. + // Return empty hashmap for properties + return new Hashtable<>(); + } + } + else { + throw ex; + } + } + } + + /** + * Sets user-defined properties(metadata) of the blob over blob endpoint. + * @param path on which metadata is to be set + * @param metadata set of user-defined properties to be set + * @param tracingContext + * @throws AzureBlobFileSystemException + */ + public void setBlobMetadata(final Path path, + final Hashtable metadata, TracingContext tracingContext) + throws AzureBlobFileSystemException { + try (AbfsPerfInfo perfInfo = startTracking("setBlobMetadata", "setBlobMetadata")) { + LOG.debug("setBlobMetadata for filesystem: {} path: {} with properties: {}", + client.getFileSystem(), + path, + metadata); + + final List metadataRequestHeaders = getRequestHeadersForMetadata(metadata); + final AbfsRestOperation op = client.setBlobMetadata( + path, metadataRequestHeaders, tracingContext); + + perfInfo.registerResult(op.getResult()).registerSuccess(true); + } + } + + /** + * User-Defined Properties over blob endpoint are actually response headers + * with prefix "x-ms-meta-". Each property is a different response header. + * This parses all the headers, removes the prefix and create a hashmap. + * @param result AbfsHttpOperation result containing response headers. + * @return Hashmap defining user defined metadata. + */ + private Hashtable parseResponseHeadersToHashTable( + AbfsHttpOperation result) { + final Hashtable metadata = new Hashtable<>(); + String name, value; + + final Map> responseHeaders = result.getResponseHeaders(); + for (Map.Entry> entry : responseHeaders.entrySet()) { + name = entry.getKey(); + + if (name != null && name.startsWith(X_MS_METADATA_PREFIX)) { + value = entry.getValue().get(0); + metadata.put(name.substring(X_MS_METADATA_PREFIX.length()), value); + } + } + return metadata; + } + + /** + * User-defined properties over blob endpoint are required to be set + * as request header with prefix "x-ms-meta-". Each property need to be made + * into a different request header. This parses all the properties, add prefix + * and create request headers. + * @param metadata Hashmap + * @return List of request headers to be passed with API call. + */ + private List getRequestHeadersForMetadata(Hashtable metadata) { + final List headers = new ArrayList(); + + for(Map.Entry entry : metadata.entrySet()) { + headers.add(new AbfsHttpHeader(X_MS_METADATA_PREFIX + entry.getKey(), entry.getValue())); + } + return headers; } + /** * Get the list of a blob on a give path, or blob starting with the given prefix. * @@ -926,7 +1034,11 @@ private AbfsRestOperation conditionalCreateOverwriteFile(final String relativePa if (e.getStatusCode() == HTTP_CONFLICT) { // File pre-exists, fetch eTag try { - op = client.getPathStatus(relativePath, false, tracingContext); + if (getPrefixMode() == PrefixMode.BLOB) { + op = client.getBlobProperty(new Path(relativePath), tracingContext); + } else { + op = client.getPathStatus(relativePath, false, tracingContext); + } } catch (AbfsRestOperationException ex) { if (ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) { // Is a parallel access case, as file which was found to be @@ -997,6 +1109,7 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext( } return new AbfsOutputStreamContext(abfsConfiguration.getSasTokenRenewPeriodForStreamsInSeconds()) .withWriteBufferSize(bufferSize) + .enableExpectHeader(abfsConfiguration.isExpectHeaderEnabled()) .enableFlush(abfsConfiguration.isFlushEnabled()) .enableSmallWriteOptimization(abfsConfiguration.isSmallWriteOptimizationEnabled()) .disableOutputStreamFlush(abfsConfiguration.isOutputStreamFlushDisabled()) @@ -1125,15 +1238,45 @@ public AbfsInputStream openFileForRead(final Path path, String relativePath = getRelativePath(path); - final AbfsRestOperation op = client - .getPathStatus(relativePath, false, tracingContext); + AbfsRestOperation op; + if (getPrefixMode() == PrefixMode.BLOB) { + try { + op = client.getBlobProperty(new Path(relativePath), tracingContext); + } catch (AbfsRestOperationException e) { + if (e.getStatusCode() != HTTP_NOT_FOUND) { + throw e; + } + List blobsList = getListBlobs(new Path(relativePath), null, + tracingContext, 2, true); + if (blobsList.size() > 0) { + throw new AbfsRestOperationException( + AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), + AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), + "openFileForRead must be used with files and not directories", + null); + } else { + throw e; + } + } + } else { + op = client + .getPathStatus(relativePath, false, tracingContext); + } + perfInfo.registerResult(op.getResult()); - final String resourceType = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_RESOURCE_TYPE); + boolean isDirectory; + if (getPrefixMode() == PrefixMode.BLOB) { + isDirectory = Boolean.parseBoolean(op.getResult().getResponseHeader(X_MS_META_HDI_ISFOLDER)); + } else { + final String resourceType = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_RESOURCE_TYPE); + isDirectory = parseIsDirectory(resourceType); + } + final long contentLength = Long.parseLong(op.getResult().getResponseHeader(HttpHeaderConfigurations.CONTENT_LENGTH)); final String eTag = op.getResult().getResponseHeader(HttpHeaderConfigurations.ETAG); - if (parseIsDirectory(resourceType)) { + if (isDirectory) { throw new AbfsRestOperationException( AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), @@ -1183,14 +1326,44 @@ public OutputStream openFileForWrite(final Path path, String relativePath = getRelativePath(path); - final AbfsRestOperation op = client - .getPathStatus(relativePath, false, tracingContext); + final AbfsRestOperation op; + try { + if (getPrefixMode() == PrefixMode.BLOB) { + op = client.getBlobProperty(path, tracingContext); + } else { + op = client.getPathStatus(relativePath, false, tracingContext); + } + } catch (AbfsRestOperationException ex) { + // The path does not exist explicitly. + // Check here if the path is an implicit dir + if (getPrefixMode() == PrefixMode.BLOB && ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) { + List blobProperties = getListBlobs(path, null, + tracingContext, 2, true); + if (blobProperties.size() != 0) { + throw new AbfsRestOperationException( + AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), + AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), + "openFileForWrite must be used with files and not directories", + null); + } else { + throw ex; + } + } else { + throw ex; + } + } perfInfo.registerResult(op.getResult()); final String resourceType = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_RESOURCE_TYPE); final Long contentLength = Long.valueOf(op.getResult().getResponseHeader(HttpHeaderConfigurations.CONTENT_LENGTH)); - if (parseIsDirectory(resourceType)) { + boolean isDirectory; + if (getPrefixMode() == PrefixMode.BLOB) { + isDirectory = op.getResult().getResponseHeader(X_MS_META_HDI_ISFOLDER) != null; + } else { + isDirectory = parseIsDirectory(resourceType); + } + if (isDirectory) { throw new AbfsRestOperationException( AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), @@ -1275,7 +1448,10 @@ public void rename(final Path source, final Path destination, String nextMarker = blobList.getNextMarker(); List srcBlobProperties = blobList.getBlobPropertyList(); - ListBlobQueue listBlobQueue = new ListBlobQueue(blobList); + ListBlobQueue listBlobQueue = new ListBlobQueue( + blobList.getBlobPropertyList(), + getAbfsConfiguration().getProducerQueueMaxSize(), + getAbfsConfiguration().getBlobDirRenameMaxThread()); if (nextMarker != null) { new ListBlobProducer(listSrc, client, listBlobQueue, nextMarker, tracingContext); @@ -1365,7 +1541,7 @@ public void rename(final Path source, final Path destination, if (isAtomicRenameKey(source.toUri().getPath())) { LOG.debug("source dir {} is an atomicRenameKey", source.toUri().getPath()); - srcDirLease = new AbfsBlobLease(client, source.toUri().getPath(), tracingContext); + srcDirLease = new AbfsBlobLease(client, source.toUri().getPath(), BLOB_LEASE_ONE_MINUTE_DURATION, tracingContext); renameAtomicityUtils.preRename(srcBlobProperties, isCreateOperationOnBlobEndpoint()); isAtomicRename = true; } else { @@ -1385,7 +1561,8 @@ public void rename(final Path source, final Path destination, LOG.debug("source {} is not directory", source); String leaseId = null; if (isAtomicRenameKey(source.toUri().getPath())) { - leaseId = new AbfsBlobLease(client, source.toUri().getPath(), tracingContext).getLeaseId(); + leaseId = new AbfsBlobLease(client, source.toUri().getPath(), + BLOB_LEASE_ONE_MINUTE_DURATION, tracingContext).getLeaseID(); } renameBlob(blobPropOnSrc.getPath(), destination, leaseId, tracingContext ); @@ -1434,15 +1611,18 @@ private void renameBlobDir(final Path source, final ListBlobQueue listBlobQueue, final BlobProperty blobPropOnSrc, final AbfsBlobLease srcDirBlobLease, final Boolean isAtomicRename) throws AzureBlobFileSystemException { - BlobList blobList; + List blobList; ListBlobConsumer listBlobConsumer = new ListBlobConsumer(listBlobQueue); + final ExecutorService renameBlobExecutorService + = Executors.newFixedThreadPool( + getAbfsConfiguration().getBlobDirRenameMaxThread()); while(!listBlobConsumer.isCompleted()) { blobList = listBlobConsumer.consume(); if(blobList == null) { continue; } List futures = new ArrayList<>(); - for (BlobProperty blobProperty : blobList.getBlobPropertyList()) { + for (BlobProperty blobProperty : blobList) { futures.add(renameBlobExecutorService.submit(() -> { try { AbfsBlobLease blobLease = null; @@ -1459,16 +1639,13 @@ private void renameBlobDir(final Path source, * on a log file, to gain exclusive access to it, before it splits it. */ blobLease = new AbfsBlobLease(client, - blobProperty.getPath().toUri().getPath(), tracingContext); - } - if(srcDirBlobLease != null) { - srcDirBlobLease.renewIfRequired(); + blobProperty.getPath().toUri().getPath(), BLOB_LEASE_ONE_MINUTE_DURATION, tracingContext); } renameBlob( blobProperty.getPath(), createDestinationPathForBlobPartOfRenameSrcDir(destination, blobProperty, source), - blobLease != null ? blobLease.getLeaseId() : null, + blobLease != null ? blobLease.getLeaseID() : null, tracingContext); } catch (AzureBlobFileSystemException e) { LOG.error(String.format("rename from %s to %s for blob %s failed", @@ -1487,11 +1664,12 @@ private void renameBlobDir(final Path source, } } } + renameBlobExecutorService.shutdown(); renameBlob( blobPropOnSrc.getPath(), createDestinationPathForBlobPartOfRenameSrcDir(destination, blobPropOnSrc, source), - srcDirBlobLease != null ? srcDirBlobLease.getLeaseId() : null, + srcDirBlobLease != null ? srcDirBlobLease.getLeaseID() : null, tracingContext); } @@ -1651,6 +1829,98 @@ public FileStatus getFileStatus(final Path path, } } + public FileStatus getFileStatusOverBlob(final Path path, + TracingContext tracingContext) throws IOException { + try (AbfsPerfInfo perfInfo = startTracking("getFileStatus", "undetermined")) { + LOG.debug("getFileStatus filesystem call over blob endpoint: {} path: {}", + client.getFileSystem(), + path); + + final AbfsRestOperation op; + + // Try to getBlobProperty for explicit blobs + if (path.isRoot()) { + perfInfo.registerCallee("getContainerProperties"); + op = client.getContainerProperty(tracingContext); + } else { + perfInfo.registerCallee("getBlobProperty"); + op = client.getBlobProperty(path, tracingContext); + } + + perfInfo.registerResult(op.getResult()); + final long blockSize = abfsConfiguration.getAzureBlockSize(); + final AbfsHttpOperation result = op.getResult(); + + String eTag = extractEtagHeader(result); + final String lastModified = result.getResponseHeader(HttpHeaderConfigurations.LAST_MODIFIED); + final long contentLength; + final boolean resourceIsDir; + + if (path.isRoot()) { + contentLength = 0; + resourceIsDir = true; + } else { + contentLength = parseContentLength(result.getResponseHeader( + HttpHeaderConfigurations.CONTENT_LENGTH)); + resourceIsDir = result.getResponseHeader( + X_MS_META_HDI_ISFOLDER) != null; + } + + final String transformedOwner = identityTransformer.transformIdentityForGetRequest( + result.getResponseHeader(HttpHeaderConfigurations.X_MS_OWNER), + true, + userName); + + final String transformedGroup = identityTransformer.transformIdentityForGetRequest( + result.getResponseHeader(HttpHeaderConfigurations.X_MS_GROUP), + false, + primaryUserGroup); + + perfInfo.registerSuccess(true); + + return new VersionedFileStatus( + transformedOwner, + transformedGroup, + new AbfsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL), + false, + contentLength, + resourceIsDir, + 1, + blockSize, + DateTimeUtils.parseLastModifiedTime(lastModified), + path, + eTag); + } + catch (AbfsRestOperationException ex) { + // The path does not exist explicitly. + // Check here if the path is an implicit dir + if (ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND && !path.isRoot()) { + List blobProperties = getListBlobs(path,null, tracingContext, 2, true); + if (blobProperties.size() == 0) { + throw ex; + } + else { + // TODO: return properties of first child blob here like in wasb after listFileStatus is implemented over blob + return new VersionedFileStatus( + userName, + primaryUserGroup, + new AbfsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL), + false, + 0L, + true, + 1, + abfsConfiguration.getAzureBlockSize(), + DateTimeUtils.parseLastModifiedTime(null), + path, + null); + } + } + else { + throw ex; + } + } + } + /** * @param path The list path. * @param tracingContext Tracks identifiers for request header @@ -2170,7 +2440,9 @@ RenameAtomicityUtils.RedoRenameInvocation getRedoRenameInvocation(final TracingC public void redo(final Path destination, final Path src) throws AzureBlobFileSystemException { - ListBlobQueue listBlobQueue = new ListBlobQueue(); + ListBlobQueue listBlobQueue = new ListBlobQueue( + getAbfsConfiguration().getProducerQueueMaxSize(), + getAbfsConfiguration().getBlobDirRenameMaxThread()); StringBuilder listSrcBuilder = new StringBuilder(src.toUri().getPath()); if (!src.isRoot()) { listSrcBuilder.append(FORWARD_SLASH); @@ -2180,7 +2452,7 @@ public void redo(final Path destination, final Path src) tracingContext); BlobProperty srcBlobProperty = getBlobProperty(src, tracingContext); AbfsBlobLease abfsBlobLease = new AbfsBlobLease(client, - src.toUri().getPath(), tracingContext); + src.toUri().getPath(), BLOB_LEASE_ONE_MINUTE_DURATION, tracingContext); renameBlobDir(src, destination, tracingContext, listBlobQueue, srcBlobProperty, abfsBlobLease, true); } @@ -2618,7 +2890,12 @@ private AbfsLease maybeCreateLease(String relativePath, TracingContext tracingCo if (!enableInfiniteLease) { return null; } - AbfsLease lease = new AbfsLease(client, relativePath, tracingContext); + final AbfsLease lease; + if (getPrefixMode() == PrefixMode.DFS) { + lease = new AbfsDfsLease(client, relativePath, null, tracingContext); + } else { + lease = new AbfsBlobLease(client, relativePath, null, tracingContext); + } leaseRefs.put(lease, null); return lease; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index 32fa802200ac8a..b294ad218cec35 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -74,6 +74,11 @@ public final class AbfsHttpConstants { public static final String HTTP_METHOD_PATCH = "PATCH"; public static final String HTTP_METHOD_POST = "POST"; public static final String HTTP_METHOD_PUT = "PUT"; + /** + * All status codes less than http 100 signify error + * and should qualify for retry. + */ + public static final int HTTP_CONTINUE = 100; // Abfs generic constants public static final String SINGLE_WHITE_SPACE = " "; @@ -120,6 +125,9 @@ public final class AbfsHttpConstants { public static final String DEFAULT_SCOPE = "default:"; public static final String PERMISSION_FORMAT = "%04d"; public static final String SUPER_USER = "$superuser"; + // The HTTP 100 Continue informational status response code indicates that everything so far + // is OK and that the client should continue with the request or ignore it if it is already finished. + public static final String HUNDRED_CONTINUE = "100-continue"; public static final char CHAR_FORWARD_SLASH = '/'; public static final char CHAR_EXCLAMATION_POINT = '!'; @@ -143,6 +151,7 @@ public final class AbfsHttpConstants { public static final String COPY_STATUS_ABORTED = "aborted"; public static final String COPY_STATUS_FAILED = "failed"; public static final String HDI_ISFOLDER = "hdi_isfolder"; + public static final Integer BLOB_LEASE_ONE_MINUTE_DURATION = 60; private AbfsHttpConstants() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index f48dee5c27310c..23989b9f3444fb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -35,9 +35,15 @@ public final class ConfigurationKeys { * path to determine HNS status. */ public static final String FS_AZURE_ACCOUNT_IS_HNS_ENABLED = "fs.azure.account.hns.enabled"; + /** + * Enable or disable expect hundred continue header. + * Value: {@value}. + */ + public static final String FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED = "fs.azure.account.expect.header.enabled"; public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME = "fs.azure.account.key"; public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME_REGX = "fs\\.azure\\.account\\.key\\.(.*)"; public static final String FS_AZURE_SECURE_MODE = "fs.azure.secure.mode"; + public static final String FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = "fs.azure.account.throttling.enabled"; // Retry strategy defined by the user public static final String AZURE_MIN_BACKOFF_INTERVAL = "fs.azure.io.retry.min.backoff.interval"; @@ -116,6 +122,8 @@ public final class ConfigurationKeys { public static final String AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION = "fs.azure.createRemoteFileSystemDuringInitialization"; public static final String AZURE_SKIP_USER_GROUP_METADATA_DURING_INITIALIZATION = "fs.azure.skipUserGroupMetadataDuringInitialization"; public static final String FS_AZURE_ENABLE_AUTOTHROTTLING = "fs.azure.enable.autothrottling"; + public static final String FS_AZURE_ACCOUNT_OPERATION_IDLE_TIMEOUT = "fs.azure.account.operation.idle.timeout"; + public static final String FS_AZURE_ANALYSIS_PERIOD = "fs.azure.analysis.period"; public static final String FS_AZURE_ALWAYS_USE_HTTPS = "fs.azure.always.use.https"; public static final String FS_AZURE_ATOMIC_RENAME_KEY = "fs.azure.atomic.rename.key"; /** This config ensures that during create overwrite an existing file will be @@ -262,10 +270,12 @@ public static String accountProperty(String property, String account) { public static final String FS_AZURE_ENABLE_BLOB_ENDPOINT = "fs.azure.enable.blob.endpoint"; public static final String FS_AZURE_MKDIRS_FALLBACK_TO_DFS = "fs.azure.mkdirs.fallback.to.dfs"; public static final String FS_AZURE_INGRESS_FALLBACK_TO_DFS = "fs.azure.ingress.fallback.to.dfs"; + public static final String FS_AZURE_READ_FALLBACK_TO_DFS = "fs.azure.read.fallback.to.dfs"; public static final String FS_AZURE_REDIRECT_DELETE = "fs.azure.redirect.delete"; public static final String FS_AZURE_REDIRECT_RENAME = "fs.azure.redirect.rename"; - public static final String FS_AZURE_MAX_CONSUMER_LAG = "fs.azure.max.consumer.lag"; + public static final String FS_AZURE_PRODUCER_QUEUE_MAX_SIZE = "fs.azure.producer.queue.max.size"; + public static final String FS_AZURE_LEASE_CREATE_NON_RECURSIVE = "fs.azure.lease.create.non.recursive"; private ConfigurationKeys() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 23aefcafb8c68a..9efb16ea746100 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -32,7 +32,7 @@ public final class FileSystemConfigurations { public static final String DEFAULT_FS_AZURE_ACCOUNT_IS_HNS_ENABLED = ""; - + public static final boolean DEFAULT_FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED = true; public static final String USER_HOME_DIRECTORY_PREFIX = "/user"; private static final int SIXTY_SECONDS = 60 * 1000; @@ -98,6 +98,9 @@ public final class FileSystemConfigurations { public static final boolean DEFAULT_ENABLE_FLUSH = true; public static final boolean DEFAULT_DISABLE_OUTPUTSTREAM_FLUSH = true; public static final boolean DEFAULT_ENABLE_AUTOTHROTTLING = true; + public static final boolean DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = true; + public static final int DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS = 60_000; + public static final int DEFAULT_ANALYSIS_PERIOD_MS = 10_000; public static final DelegatingSSLSocketFactory.SSLChannelMode DEFAULT_FS_AZURE_SSL_CHANNEL_MODE = DelegatingSSLSocketFactory.SSLChannelMode.Default; @@ -123,11 +126,15 @@ public final class FileSystemConfigurations { public static final boolean DEFAULT_FS_AZURE_ENABLE_BLOBENDPOINT = false; public static final boolean DEFAULT_FS_AZURE_MKDIRS_FALLBACK_TO_DFS = false; public static final boolean DEFAULT_FS_AZURE_INGRESS_FALLBACK_TO_DFS = false; + public static final boolean DEFAULT_AZURE_READ_FALLBACK_TO_DFS = false; // To have functionality similar to drop1 delete is going to wasb by default for now. public static final boolean DEFAULT_FS_AZURE_REDIRECT_RENAME = false; public static final boolean DEFAULT_FS_AZURE_REDIRECT_DELETE = true; - public static final int DEFAULT_FS_AZURE_MAX_CONSUMER_LAG = 7000; + public static final int DEFAULT_FS_AZURE_PRODUCER_QUEUE_MAX_SIZE = 10000; + public static final boolean DEFAULT_FS_AZURE_LEASE_CREATE_NON_RECURSIVE = false; + + public static final int DEFAULT_FS_AZURE_BLOB_RENAME_THREAD = 5; /** * Limit of queued block upload operations before writes diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java index d40d89f1a0d51e..cd3c321b4395d0 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java @@ -77,6 +77,8 @@ public final class HttpHeaderConfigurations { public static final String X_MS_COPY_SOURCE = "x-ms-copy-source"; public static final String X_MS_COPY_STATUS_DESCRIPTION = "x-ms-copy-status-description"; public static final String X_MS_COPY_STATUS = "x-ms-copy-status"; + public static final String EXPECT = "Expect"; + public static final String X_MS_METADATA_PREFIX = "x-ms-meta-"; private HttpHeaderConfigurations() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java index aba1d8c1efa2b3..147cb6d83cb0ba 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidAbfsRestOperationException.java @@ -29,12 +29,33 @@ @InterfaceAudience.Public @InterfaceStability.Evolving public class InvalidAbfsRestOperationException extends AbfsRestOperationException { + + private static final String ERROR_MESSAGE = "InvalidAbfsRestOperationException"; + public InvalidAbfsRestOperationException( final Exception innerException) { super( AzureServiceErrorCode.UNKNOWN.getStatusCode(), AzureServiceErrorCode.UNKNOWN.getErrorCode(), - "InvalidAbfsRestOperationException", + innerException != null + ? innerException.toString() + : ERROR_MESSAGE, innerException); } + + /** + * Adds the retry count along with the exception. + * @param innerException The inner exception which is originally caught. + * @param retryCount The retry count when the exception was thrown. + */ + public InvalidAbfsRestOperationException( + final Exception innerException, int retryCount) { + super( + AzureServiceErrorCode.UNKNOWN.getStatusCode(), + AzureServiceErrorCode.UNKNOWN.getErrorCode(), + innerException != null + ? innerException.toString() + : ERROR_MESSAGE + " RetryCount: " + retryCount, + innerException); + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java index 7369bfaf56422c..57e559a60ec844 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AppendRequestParameters.java @@ -34,19 +34,22 @@ public enum Mode { private final Mode mode; private final boolean isAppendBlob; private final String leaseId; + private boolean isExpectHeaderEnabled; public AppendRequestParameters(final long position, final int offset, final int length, final Mode mode, final boolean isAppendBlob, - final String leaseId) { + final String leaseId, + final boolean isExpectHeaderEnabled) { this.position = position; this.offset = offset; this.length = length; this.mode = mode; this.isAppendBlob = isAppendBlob; this.leaseId = leaseId; + this.isExpectHeaderEnabled = isExpectHeaderEnabled; } public long getPosition() { @@ -72,4 +75,12 @@ public boolean isAppendBlob() { public String getLeaseId() { return this.leaseId; } + + public boolean isExpectHeaderEnabled() { + return isExpectHeaderEnabled; + } + + public void setExpectHeaderEnabled(boolean expectHeaderEnabled) { + isExpectHeaderEnabled = expectHeaderEnabled; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java index 22e9d46a75d445..c5f59e96905d82 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/extensions/SASTokenProvider.java @@ -44,6 +44,7 @@ public interface SASTokenProvider { String GET_PROPERTIES_OPERATION = "get-properties"; String GET_BLOB_PROPERTIES_OPERATION = "get-blob-properties"; String GET_CONTAINER_PROPERTIES_OPERATION = "get-container-properties"; + String GET_BLOB_METADATA_OPERATION = "get-blob-metadata"; String LIST_OPERATION = "list"; String LIST_BLOB_OPERATION = "list-blob"; String COPY_BLOB_DESTINATION = "copy-blob-dst"; @@ -55,6 +56,7 @@ public interface SASTokenProvider { String SET_OWNER_OPERATION = "set-owner"; String SET_PERMISSION_OPERATION = "set-permission"; String SET_PROPERTIES_OPERATION = "set-properties"; + String SET_BLOB_METADATA_OPERATION = "set-blob-metadata"; String WRITE_OPERATION = "write"; String LEASE_OPERATION = "lease"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobLease.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobLease.java index 40245f36535c91..e254ff98f6444c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobLease.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsBlobLease.java @@ -23,56 +23,47 @@ import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_ID; -public class AbfsBlobLease { - private String leaseId; - private Long leaseRenewLastEpoch; - private final TracingContext tracingContext; - private final AbfsClient client; - private final String path; - private final Integer ONE_MINUTE = 60; - private final Long RENEW_TIME = 30 * 1_000L; - private Boolean freed = false; +public class AbfsBlobLease extends AbfsLease { - public AbfsBlobLease(AbfsClient client, - String path, - TracingContext tracingContext) throws - AzureBlobFileSystemException { - this.client = client; - this.path = path; - this.tracingContext = tracingContext; - AbfsRestOperation op = client.acquireBlobLease(path, ONE_MINUTE, - tracingContext); - extractLeaseInfo(op); + public AbfsBlobLease(final AbfsClient client, + final String path, + final Integer leaseDuration, + final TracingContext tracingContext) throws AzureBlobFileSystemException { + super(client, path, leaseDuration, tracingContext); } - private void extractLeaseInfo(final AbfsRestOperation op) { - leaseId = op.getResult().getResponseHeader(X_MS_LEASE_ID); - leaseRenewLastEpoch = System.currentTimeMillis(); + public AbfsBlobLease(final AbfsClient client, + final String path, + final int acquireMaxRetries, + final int acquireRetryInterval, + final Integer leaseDuration, + final TracingContext tracingContext) throws AzureBlobFileSystemException { + super(client, path, acquireMaxRetries, acquireRetryInterval, leaseDuration, + tracingContext); } - public String getLeaseId() { - return leaseId; + @Override + String callRenewLeaseAPI(final String path, + final String leaseId, + final TracingContext tracingContext) throws AzureBlobFileSystemException { + return extractLeaseInfo(client.renewBlobLease(path, leaseId, tracingContext)); } - public void renewIfRequired() throws AzureBlobFileSystemException { - if (System.currentTimeMillis() - leaseRenewLastEpoch >= RENEW_TIME) { - renew(); - } + @Override + AbfsRestOperation callAcquireLeaseAPI(final String path, + final Integer leaseDuration, + final TracingContext tracingContext) throws AzureBlobFileSystemException { + return client.acquireBlobLease(path, leaseDuration, tracingContext); } - private synchronized void renew() throws AzureBlobFileSystemException { - if (System.currentTimeMillis() - leaseRenewLastEpoch < RENEW_TIME) { - return; - } - AbfsRestOperation op = client.renewBlobLease(path, leaseId, tracingContext); - extractLeaseInfo(op); + @Override + void callReleaseLeaseAPI(final String path, + final String leaseID, + final TracingContext tracingContext) throws AzureBlobFileSystemException { + client.releaseBlobLease(path, leaseID, tracingContext); } - public synchronized void free() throws AzureBlobFileSystemException { - if (freed) { - return; - } - client.releaseBlobLease(path, leaseId, tracingContext); - freed = true; + private String extractLeaseInfo(final AbfsRestOperation op) { + return op.getResult().getResponseHeader(X_MS_LEASE_ID); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index e961e13ac399ff..bb2cca57016185 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -71,6 +71,7 @@ import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; import org.apache.hadoop.util.concurrent.HadoopExecutors; +import static java.net.HttpURLConnection.HTTP_CONFLICT; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.*; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_DELETE_CONSIDERED_IDEMPOTENT; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.SERVER_SIDE_ENCRYPTION_ALGORITHM; @@ -79,6 +80,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.WASB_DNS_PREFIX; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.*; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.*; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; /** * AbfsClient. @@ -102,6 +104,7 @@ public class AbfsClient implements Closeable { private AccessTokenProvider tokenProvider; private SASTokenProvider sasTokenProvider; private final AbfsCounters abfsCounters; + private final AbfsThrottlingIntercept intercept; private final ListeningScheduledExecutorService executorService; @@ -117,6 +120,7 @@ private AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCreden this.retryPolicy = abfsClientContext.getExponentialRetryPolicy(); this.accountName = abfsConfiguration.getAccountName().substring(0, abfsConfiguration.getAccountName().indexOf(AbfsHttpConstants.DOT)); this.authType = abfsConfiguration.getAuthType(accountName); + this.intercept = AbfsThrottlingInterceptFactory.getInstance(accountName, abfsConfiguration); String encryptionKey = this.abfsConfiguration .getClientProvidedEncryptionKey(); @@ -222,6 +226,10 @@ SharedKeyCredentials getSharedKeyCredentials() { return sharedKeyCredentials; } + AbfsThrottlingIntercept getIntercept() { + return intercept; + } + List createDefaultHeaders() { final List requestHeaders = new ArrayList(); requestHeaders.add(new AbfsHttpHeader(X_MS_VERSION, xMsVersion)); @@ -420,7 +428,7 @@ public AbfsRestOperation createPath(final String path, final boolean isFile, fin if (!op.hasResult()) { throw ex; } - if (!isFile && op.getResult().getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { + if (!isFile && op.getResult().getStatusCode() == HTTP_CONFLICT) { String existingResource = op.getResult().getResponseHeader(X_MS_EXISTING_RESOURCE_TYPE); if (existingResource != null && existingResource.equals(DIRECTORY)) { @@ -471,7 +479,7 @@ public AbfsRestOperation createPathBlob(final String path, final boolean isFile, if (!op.hasResult()) { throw ex; } - if (!isFile && op.getResult().getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { + if (!isFile && op.getResult().getStatusCode() == HTTP_CONFLICT) { // This ensures that we don't throw ex only for existing directory but if a blob exists we throw exception. tracingContext.setFallbackDFSAppend(tracingContext.getFallbackDFSAppend() + "M"); AbfsRestOperation blobProperty = getBlobProperty(new Path(path), tracingContext); @@ -686,6 +694,9 @@ public AbfsRestOperation append(final String path, final byte[] buffer, throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); addCustomerProvidedKeyHeaders(requestHeaders); + if (reqParams.isExpectHeaderEnabled()) { + requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); + } // JDK7 does not support PATCH, so to workaround the issue we will use // PUT and specify the real method in the X-Http-Method-Override header. requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, @@ -714,19 +725,33 @@ public AbfsRestOperation append(final String path, final byte[] buffer, if (url.toString().contains(WASB_DNS_PREFIX)) { url = changePrefixFromBlobtoDfs(url); } - final AbfsRestOperation op = new AbfsRestOperation( - AbfsRestOperationType.Append, - this, - HTTP_METHOD_PUT, - url, - requestHeaders, - buffer, - reqParams.getoffset(), - reqParams.getLength(), - sasTokenForReuse); + final AbfsRestOperation op = getAbfsRestOperationForAppend(AbfsRestOperationType.Append, + HTTP_METHOD_PUT, + url, + requestHeaders, + buffer, + reqParams.getoffset(), + reqParams.getLength(), + sasTokenForReuse); try { op.execute(tracingContext); } catch (AzureBlobFileSystemException e) { + /* + If the http response code indicates a user error we retry + the same append request with expect header being disabled. + When "100-continue" header is enabled but a non Http 100 response comes, + the response message might not get set correctly by the server. + So, this handling is to avoid breaking of backward compatibility + if someone has taken dependency on the exception message, + which is created using the error string present in the response header. + */ + int responseStatusCode = ((AbfsRestOperationException) e).getStatusCode(); + if (checkUserError(responseStatusCode) && reqParams.isExpectHeaderEnabled()) { + LOG.debug("User error, retrying without 100 continue enabled for the given path {}", path); + reqParams.setExpectHeaderEnabled(false); + return this.append(path, buffer, reqParams, cachedSasToken, + tracingContext); + } // If we have no HTTP response, throw the original exception. if (!op.hasResult()) { throw e; @@ -734,16 +759,15 @@ public AbfsRestOperation append(final String path, final byte[] buffer, if (reqParams.isAppendBlob() && appendSuccessCheckOp(op, path, (reqParams.getPosition() + reqParams.getLength()), tracingContext)) { - final AbfsRestOperation successOp = new AbfsRestOperation( - AbfsRestOperationType.Append, - this, - HTTP_METHOD_PUT, - url, - requestHeaders, - buffer, - reqParams.getoffset(), - reqParams.getLength(), - sasTokenForReuse); + final AbfsRestOperation successOp = getAbfsRestOperationForAppend( + AbfsRestOperationType.Append, + HTTP_METHOD_PUT, + url, + requestHeaders, + buffer, + reqParams.getoffset(), + reqParams.getLength(), + sasTokenForReuse); successOp.hardSetResult(HttpURLConnection.HTTP_OK); return successOp; } @@ -773,7 +797,9 @@ public AbfsRestOperation append(final String blockId, final String path, final b if (reqParams.getLeaseId() != null) { requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, reqParams.getLeaseId())); } - + if (reqParams.isExpectHeaderEnabled()) { + requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); + } final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, BLOCK); abfsUriQueryBuilder.addQuery(QUERY_PARAM_BLOCKID, blockId); @@ -795,7 +821,29 @@ public AbfsRestOperation append(final String blockId, final String path, final b reqParams.getoffset(), reqParams.getLength(), sasTokenForReuse); - op.execute(tracingContext); + try { + op.execute(tracingContext); + } catch (AzureBlobFileSystemException e) { + /* + If the http response code indicates a user error we retry + the same append request with expect header being disabled. + When "100-continue" header is enabled but a non Http 100 response comes, + the response message might not get set correctly by the server. + So, this handling is to avoid breaking of backward compatibility + if someone has taken dependency on the exception message, + which is created using the error string present in the response header. + */ + int responseStatusCode = ((AbfsRestOperationException) e).getStatusCode(); + if (checkUserErrorBlob(responseStatusCode) && reqParams.isExpectHeaderEnabled()) { + LOG.debug("User error, retrying without 100 continue enabled for the given path {}", path); + reqParams.setExpectHeaderEnabled(false); + return this.append(blockId, path, buffer, reqParams, cachedSasToken, + tracingContext, eTag); + } + else { + throw e; + } + } return op; } @@ -843,6 +891,61 @@ public AbfsRestOperation flush(byte[] buffer, final String path, boolean isClose return op; } + /* + * Returns the rest operation for append. + * @param operationType The AbfsRestOperationType. + * @param httpMethod specifies the httpMethod. + * @param url specifies the url. + * @param requestHeaders This includes the list of request headers. + * @param buffer The buffer to write into. + * @param bufferOffset The buffer offset. + * @param bufferLength The buffer Length. + * @param sasTokenForReuse The sasToken. + * @return AbfsRestOperation op. + */ + @VisibleForTesting + AbfsRestOperation getAbfsRestOperationForAppend(final AbfsRestOperationType operationType, + final String httpMethod, + final URL url, + final List requestHeaders, + final byte[] buffer, + final int bufferOffset, + final int bufferLength, + final String sasTokenForReuse) { + return new AbfsRestOperation( + operationType, + this, + httpMethod, + url, + requestHeaders, + buffer, + bufferOffset, + bufferLength, sasTokenForReuse); + } + + /** + * Returns true if the status code lies in the range of user error. + * @param responseStatusCode http response status code. + * @return True or False. + */ + private boolean checkUserError(int responseStatusCode) { + return (responseStatusCode >= HttpURLConnection.HTTP_BAD_REQUEST + && responseStatusCode < HttpURLConnection.HTTP_INTERNAL_ERROR); + } + + /** + * Returns true if the status code lies in the range of user error. + * In the case of HTTP_CONFLICT for PutBlockList we fallback to DFS and hence + * this retry handling is not needed. + * @param responseStatusCode http response status code. + * @return True or False. + */ + private boolean checkUserErrorBlob(int responseStatusCode) { + return (responseStatusCode >= HttpURLConnection.HTTP_BAD_REQUEST + && responseStatusCode < HttpURLConnection.HTTP_INTERNAL_ERROR + && responseStatusCode != HttpURLConnection.HTTP_CONFLICT); + } + // For AppendBlob its possible that the append succeeded in the backend but the request failed. // However a retry would fail with an InvalidQueryParameterValue // (as the current offset would be unacceptable). @@ -1007,11 +1110,18 @@ public AbfsRestOperation read(final String path, final long position, final byte abfsUriQueryBuilder, cachedSasToken); URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - if (url.toString().contains(WASB_DNS_PREFIX)) { - url = changePrefixFromBlobtoDfs(url); + final AbfsRestOperationType opType; + if (!OperativeEndpoint.isReadEnabledOnDFS( + getAbfsConfiguration().getPrefixMode(), getAbfsConfiguration())) { + opType = AbfsRestOperationType.GetBlob; + } else { + if (url.toString().contains(WASB_DNS_PREFIX)) { + url = changePrefixFromBlobtoDfs(url); + } + opType = AbfsRestOperationType.ReadFile; } final AbfsRestOperation op = new AbfsRestOperation( - AbfsRestOperationType.ReadFile, + opType, this, HTTP_METHOD_GET, url, @@ -1341,6 +1451,9 @@ public AbfsRestOperation getContainerProperty(TracingContext tracingContext) thr final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESTYPE, CONTAINER); + appendSASTokenToQuery("", + SASTokenProvider.GET_CONTAINER_PROPERTIES_OPERATION, abfsUriQueryBuilder); + final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); final AbfsRestOperation op = new AbfsRestOperation( @@ -1353,6 +1466,71 @@ public AbfsRestOperation getContainerProperty(TracingContext tracingContext) thr return op; } + /** + * Gets user-defined properties(metadata) of the blob over blob endpoint. + * @param blobPath + * @param tracingContext + * @return the user-defined properties on blob path + * @throws AzureBlobFileSystemException + */ + public AbfsRestOperation getBlobMetadata(Path blobPath, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, QUERY_PARAM_INCLUDE_VALUE_METADATA); + + String blobRelativePath = blobPath.toUri().getPath(); + appendSASTokenToQuery(blobRelativePath, + SASTokenProvider.GET_BLOB_METADATA_OPERATION, abfsUriQueryBuilder); + + final URL url = createRequestUrl(blobRelativePath, + abfsUriQueryBuilder.toString()); + + final AbfsRestOperation op = new AbfsRestOperation( + AbfsRestOperationType.GetBlobMetadata, + this, + HTTP_METHOD_HEAD, + url, + requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Sets user-defined properties(metadata) of the blob over blob endpoint. + * @param blobPath + * @param metadataRequestHeaders + * @param tracingContext + * @throws AzureBlobFileSystemException + */ + public AbfsRestOperation setBlobMetadata(Path blobPath, List metadataRequestHeaders, + TracingContext tracingContext) throws AzureBlobFileSystemException { + // Request Header for this call will also contain metadata headers + final List defaultRequestHeaders = createDefaultHeaders(); + final List requestHeaders = new ArrayList(); + requestHeaders.addAll(defaultRequestHeaders); + requestHeaders.addAll(metadataRequestHeaders); + + AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, QUERY_PARAM_INCLUDE_VALUE_METADATA); + + String blobRelativePath = blobPath.toUri().getPath(); + appendSASTokenToQuery(blobRelativePath, + SASTokenProvider.SET_BLOB_METADATA_OPERATION, abfsUriQueryBuilder); + + final URL url = createRequestUrl(blobRelativePath, + abfsUriQueryBuilder.toString()); + + final AbfsRestOperation op = new AbfsRestOperation( + AbfsRestOperationType.SetBlobMetadata, + this, + HTTP_METHOD_PUT, + url, + requestHeaders); + op.execute(tracingContext); + return op; + } + /** * Call server API BlobList. * @@ -1658,4 +1836,9 @@ public void addCallback(ListenableFuture future, FutureCallback callba AbfsConfiguration getAbfsConfiguration() { return abfsConfiguration; } + + @VisibleForTesting + protected AccessTokenProvider getTokenProvider() { + return tokenProvider; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingAnalyzer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingAnalyzer.java index a55c924dd81524..2060de6f14a97e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingAnalyzer.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingAnalyzer.java @@ -20,27 +20,30 @@ import java.util.Timer; import java.util.TimerTask; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; -import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.util.Preconditions; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.util.Time.now; + class AbfsClientThrottlingAnalyzer { private static final Logger LOG = LoggerFactory.getLogger( - AbfsClientThrottlingAnalyzer.class); - private static final int DEFAULT_ANALYSIS_PERIOD_MS = 10 * 1000; + AbfsClientThrottlingAnalyzer.class); private static final int MIN_ANALYSIS_PERIOD_MS = 1000; private static final int MAX_ANALYSIS_PERIOD_MS = 30000; private static final double MIN_ACCEPTABLE_ERROR_PERCENTAGE = .1; private static final double MAX_EQUILIBRIUM_ERROR_PERCENTAGE = 1; private static final double RAPID_SLEEP_DECREASE_FACTOR = .75; private static final double RAPID_SLEEP_DECREASE_TRANSITION_PERIOD_MS = 150 - * 1000; + * 1000; private static final double SLEEP_DECREASE_FACTOR = .975; private static final double SLEEP_INCREASE_FACTOR = 1.05; private int analysisPeriodMs; @@ -50,49 +53,86 @@ class AbfsClientThrottlingAnalyzer { private String name = null; private Timer timer = null; private AtomicReference blobMetrics = null; + private AtomicLong lastExecutionTime = null; + private final AtomicBoolean isOperationOnAccountIdle = new AtomicBoolean(false); + private AbfsConfiguration abfsConfiguration = null; + private boolean accountLevelThrottlingEnabled = true; private AbfsClientThrottlingAnalyzer() { // hide default constructor } - /** - * Creates an instance of the AbfsClientThrottlingAnalyzer class with - * the specified name. - * - * @param name a name used to identify this instance. - * @throws IllegalArgumentException if name is null or empty. - */ - AbfsClientThrottlingAnalyzer(String name) throws IllegalArgumentException { - this(name, DEFAULT_ANALYSIS_PERIOD_MS); - } - /** * Creates an instance of the AbfsClientThrottlingAnalyzer class with * the specified name and period. * * @param name A name used to identify this instance. - * @param period The frequency, in milliseconds, at which metrics are - * analyzed. + * @param abfsConfiguration The configuration set. * @throws IllegalArgumentException If name is null or empty. * If period is less than 1000 or greater than 30000 milliseconds. */ - AbfsClientThrottlingAnalyzer(String name, int period) - throws IllegalArgumentException { + AbfsClientThrottlingAnalyzer(String name, AbfsConfiguration abfsConfiguration) + throws IllegalArgumentException { Preconditions.checkArgument( - StringUtils.isNotEmpty(name), - "The argument 'name' cannot be null or empty."); + StringUtils.isNotEmpty(name), + "The argument 'name' cannot be null or empty."); + int period = abfsConfiguration.getAnalysisPeriod(); Preconditions.checkArgument( - period >= MIN_ANALYSIS_PERIOD_MS && period <= MAX_ANALYSIS_PERIOD_MS, - "The argument 'period' must be between 1000 and 30000."); + period >= MIN_ANALYSIS_PERIOD_MS && period <= MAX_ANALYSIS_PERIOD_MS, + "The argument 'period' must be between 1000 and 30000."); this.name = name; - this.analysisPeriodMs = period; + this.abfsConfiguration = abfsConfiguration; + this.accountLevelThrottlingEnabled = abfsConfiguration.accountThrottlingEnabled(); + this.analysisPeriodMs = abfsConfiguration.getAnalysisPeriod(); + this.lastExecutionTime = new AtomicLong(now()); this.blobMetrics = new AtomicReference( - new AbfsOperationMetrics(System.currentTimeMillis())); + new AbfsOperationMetrics(System.currentTimeMillis())); this.timer = new Timer( - String.format("abfs-timer-client-throttling-analyzer-%s", name), true); + String.format("abfs-timer-client-throttling-analyzer-%s", name), true); this.timer.schedule(new TimerTaskImpl(), - analysisPeriodMs, - analysisPeriodMs); + analysisPeriodMs, + analysisPeriodMs); + } + + /** + * Resumes the timer if it was stopped. + */ + private void resumeTimer() { + blobMetrics = new AtomicReference( + new AbfsOperationMetrics(System.currentTimeMillis())); + timer.schedule(new TimerTaskImpl(), + analysisPeriodMs, + analysisPeriodMs); + isOperationOnAccountIdle.set(false); + } + + /** + * Synchronized method to suspend or resume timer. + * @param timerFunctionality resume or suspend. + * @param timerTask The timertask object. + * @return true or false. + */ + private synchronized boolean timerOrchestrator(TimerFunctionality timerFunctionality, + TimerTask timerTask) { + switch (timerFunctionality) { + case RESUME: + if (isOperationOnAccountIdle.get()) { + resumeTimer(); + } + break; + case SUSPEND: + if (accountLevelThrottlingEnabled && (System.currentTimeMillis() + - lastExecutionTime.get() >= getOperationIdleTimeout())) { + isOperationOnAccountIdle.set(true); + timerTask.cancel(); + timer.purge(); + return true; + } + break; + default: + break; + } + return false; } /** @@ -104,12 +144,13 @@ private AbfsClientThrottlingAnalyzer() { public void addBytesTransferred(long count, boolean isFailedOperation) { AbfsOperationMetrics metrics = blobMetrics.get(); if (isFailedOperation) { - metrics.bytesFailed.addAndGet(count); - metrics.operationsFailed.incrementAndGet(); + metrics.addBytesFailed(count); + metrics.incrementOperationsFailed(); } else { - metrics.bytesSuccessful.addAndGet(count); - metrics.operationsSuccessful.incrementAndGet(); + metrics.addBytesSuccessful(count); + metrics.incrementOperationsSuccessful(); } + blobMetrics.set(metrics); } /** @@ -117,6 +158,8 @@ public void addBytesTransferred(long count, boolean isFailedOperation) { * @return true if Thread sleeps(Throttling occurs) else false. */ public boolean suspendIfNecessary() { + lastExecutionTime.set(now()); + timerOrchestrator(TimerFunctionality.RESUME, null); int duration = sleepDuration; if (duration > 0) { try { @@ -134,19 +177,27 @@ int getSleepDuration() { return sleepDuration; } + int getOperationIdleTimeout() { + return abfsConfiguration.getAccountOperationIdleTimeout(); + } + + AtomicBoolean getIsOperationOnAccountIdle() { + return isOperationOnAccountIdle; + } + private int analyzeMetricsAndUpdateSleepDuration(AbfsOperationMetrics metrics, int sleepDuration) { final double percentageConversionFactor = 100; - double bytesFailed = metrics.bytesFailed.get(); - double bytesSuccessful = metrics.bytesSuccessful.get(); - double operationsFailed = metrics.operationsFailed.get(); - double operationsSuccessful = metrics.operationsSuccessful.get(); + double bytesFailed = metrics.getBytesFailed().get(); + double bytesSuccessful = metrics.getBytesSuccessful().get(); + double operationsFailed = metrics.getOperationsFailed().get(); + double operationsSuccessful = metrics.getOperationsSuccessful().get(); double errorPercentage = (bytesFailed <= 0) - ? 0 - : (percentageConversionFactor - * bytesFailed - / (bytesFailed + bytesSuccessful)); - long periodMs = metrics.endTime - metrics.startTime; + ? 0 + : (percentageConversionFactor + * bytesFailed + / (bytesFailed + bytesSuccessful)); + long periodMs = metrics.getEndTime() - metrics.getStartTime(); double newSleepDuration; @@ -154,10 +205,10 @@ private int analyzeMetricsAndUpdateSleepDuration(AbfsOperationMetrics metrics, ++consecutiveNoErrorCount; // Decrease sleepDuration in order to increase throughput. double reductionFactor = - (consecutiveNoErrorCount * analysisPeriodMs - >= RAPID_SLEEP_DECREASE_TRANSITION_PERIOD_MS) - ? RAPID_SLEEP_DECREASE_FACTOR - : SLEEP_DECREASE_FACTOR; + (consecutiveNoErrorCount * analysisPeriodMs + >= RAPID_SLEEP_DECREASE_TRANSITION_PERIOD_MS) + ? RAPID_SLEEP_DECREASE_FACTOR + : SLEEP_DECREASE_FACTOR; newSleepDuration = sleepDuration * reductionFactor; } else if (errorPercentage < MAX_EQUILIBRIUM_ERROR_PERCENTAGE) { @@ -176,15 +227,15 @@ private int analyzeMetricsAndUpdateSleepDuration(AbfsOperationMetrics metrics, double additionalDelayNeeded = 5 * analysisPeriodMs; if (bytesSuccessful > 0) { additionalDelayNeeded = (bytesSuccessful + bytesFailed) - * periodMs - / bytesSuccessful - - periodMs; + * periodMs + / bytesSuccessful + - periodMs; } // amortize the additional delay needed across the estimated number of // requests during the next period newSleepDuration = additionalDelayNeeded - / (operationsFailed + operationsSuccessful); + / (operationsFailed + operationsSuccessful); final double maxSleepDuration = analysisPeriodMs; final double minSleepDuration = sleepDuration * SLEEP_INCREASE_FACTOR; @@ -201,16 +252,16 @@ private int analyzeMetricsAndUpdateSleepDuration(AbfsOperationMetrics metrics, if (LOG.isDebugEnabled()) { LOG.debug(String.format( - "%5.5s, %10d, %10d, %10d, %10d, %6.2f, %5d, %5d, %5d", - name, - (int) bytesFailed, - (int) bytesSuccessful, - (int) operationsFailed, - (int) operationsSuccessful, - errorPercentage, - periodMs, - (int) sleepDuration, - (int) newSleepDuration)); + "%5.5s, %10d, %10d, %10d, %10d, %6.2f, %5d, %5d, %5d", + name, + (int) bytesFailed, + (int) bytesSuccessful, + (int) operationsFailed, + (int) operationsSuccessful, + errorPercentage, + periodMs, + (int) sleepDuration, + (int) newSleepDuration)); } return (int) newSleepDuration; @@ -238,12 +289,15 @@ public void run() { } long now = System.currentTimeMillis(); - if (now - blobMetrics.get().startTime >= analysisPeriodMs) { + if (timerOrchestrator(TimerFunctionality.SUSPEND, this)) { + return; + } + if (now - blobMetrics.get().getStartTime() >= analysisPeriodMs) { AbfsOperationMetrics oldMetrics = blobMetrics.getAndSet( - new AbfsOperationMetrics(now)); - oldMetrics.endTime = now; + new AbfsOperationMetrics(now)); + oldMetrics.setEndTime(now); sleepDuration = analyzeMetricsAndUpdateSleepDuration(oldMetrics, - sleepDuration); + sleepDuration); } } finally { if (doWork) { @@ -252,24 +306,4 @@ public void run() { } } } - - /** - * Stores Abfs operation metrics during each analysis period. - */ - static class AbfsOperationMetrics { - private AtomicLong bytesFailed; - private AtomicLong bytesSuccessful; - private AtomicLong operationsFailed; - private AtomicLong operationsSuccessful; - private long endTime; - private long startTime; - - AbfsOperationMetrics(long startTime) { - this.startTime = startTime; - this.bytesFailed = new AtomicLong(); - this.bytesSuccessful = new AtomicLong(); - this.operationsFailed = new AtomicLong(); - this.operationsSuccessful = new AtomicLong(); - } - } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java index 7303e833418db7..3bb225d4be862d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java @@ -19,13 +19,17 @@ package org.apache.hadoop.fs.azurebfs.services; import java.net.HttpURLConnection; +import java.util.concurrent.locks.ReentrantLock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.AbfsStatistic; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; + /** * Throttles Azure Blob File System read and write operations to achieve maximum * throughput by minimizing errors. The errors occur when the account ingress @@ -38,35 +42,101 @@ * and sleeps just enough to minimize errors, allowing optimal ingress and/or * egress throughput. */ -public final class AbfsClientThrottlingIntercept { +public final class AbfsClientThrottlingIntercept implements AbfsThrottlingIntercept { private static final Logger LOG = LoggerFactory.getLogger( AbfsClientThrottlingIntercept.class); private static final String RANGE_PREFIX = "bytes="; - private static AbfsClientThrottlingIntercept singleton = null; - private AbfsClientThrottlingAnalyzer readThrottler = null; - private AbfsClientThrottlingAnalyzer writeThrottler = null; - private static boolean isAutoThrottlingEnabled = false; + private static AbfsClientThrottlingIntercept singleton; // singleton, initialized in static initialization block + private static final ReentrantLock LOCK = new ReentrantLock(); + private final AbfsClientThrottlingAnalyzer readThrottler; + private final AbfsClientThrottlingAnalyzer writeThrottler; + private final String accountName; // Hide default constructor - private AbfsClientThrottlingIntercept() { - readThrottler = new AbfsClientThrottlingAnalyzer("read"); - writeThrottler = new AbfsClientThrottlingAnalyzer("write"); + public AbfsClientThrottlingIntercept(String accountName, AbfsConfiguration abfsConfiguration) { + this.accountName = accountName; + this.readThrottler = setAnalyzer("read " + accountName, abfsConfiguration); + this.writeThrottler = setAnalyzer("write " + accountName, abfsConfiguration); + LOG.debug("Client-side throttling is enabled for the ABFS file system for the account : {}", accountName); } - public static synchronized void initializeSingleton(boolean enableAutoThrottling) { - if (!enableAutoThrottling) { - return; - } + // Hide default constructor + private AbfsClientThrottlingIntercept(AbfsConfiguration abfsConfiguration) { + // Account name is kept as empty as same instance is shared across all accounts. + this.accountName = ""; + this.readThrottler = setAnalyzer("read", abfsConfiguration); + this.writeThrottler = setAnalyzer("write", abfsConfiguration); + LOG.debug("Client-side throttling is enabled for the ABFS file system using singleton intercept"); + } + + /** + * Sets the analyzer for the intercept. + * @param name Name of the analyzer. + * @param abfsConfiguration The configuration. + * @return AbfsClientThrottlingAnalyzer instance. + */ + private AbfsClientThrottlingAnalyzer setAnalyzer(String name, AbfsConfiguration abfsConfiguration) { + return new AbfsClientThrottlingAnalyzer(name, abfsConfiguration); + } + + /** + * Returns the analyzer for read operations. + * @return AbfsClientThrottlingAnalyzer for read. + */ + AbfsClientThrottlingAnalyzer getReadThrottler() { + return readThrottler; + } + + /** + * Returns the analyzer for write operations. + * @return AbfsClientThrottlingAnalyzer for write. + */ + AbfsClientThrottlingAnalyzer getWriteThrottler() { + return writeThrottler; + } + + /** + * Creates a singleton object of the AbfsClientThrottlingIntercept. + * which is shared across all filesystem instances. + * @param abfsConfiguration configuration set. + * @return singleton object of intercept. + */ + static AbfsClientThrottlingIntercept initializeSingleton(AbfsConfiguration abfsConfiguration) { if (singleton == null) { - singleton = new AbfsClientThrottlingIntercept(); - isAutoThrottlingEnabled = true; - LOG.debug("Client-side throttling is enabled for the ABFS file system."); + LOCK.lock(); + try { + if (singleton == null) { + singleton = new AbfsClientThrottlingIntercept(abfsConfiguration); + LOG.debug("Client-side throttling is enabled for the ABFS file system."); + } + } finally { + LOCK.unlock(); + } } + return singleton; + } + + /** + * Updates the metrics for the case when response code signifies throttling + * but there are some expected bytes to be sent. + * @param isThrottledOperation returns true if status code is HTTP_UNAVAILABLE + * @param abfsHttpOperation Used for status code and data transferred. + * @return true if the operation is throttled and has some bytes to transfer. + */ + private boolean updateBytesTransferred(boolean isThrottledOperation, + AbfsHttpOperation abfsHttpOperation) { + return isThrottledOperation && abfsHttpOperation.getExpectedBytesToBeSent() > 0; } - static void updateMetrics(AbfsRestOperationType operationType, - AbfsHttpOperation abfsHttpOperation) { - if (!isAutoThrottlingEnabled || abfsHttpOperation == null) { + /** + * Updates the metrics for successful and failed read and write operations. + * @param operationType Only applicable for read and write operations. + * @param abfsHttpOperation Used for status code and data transferred. + */ + @Override + public void updateMetrics(AbfsRestOperationType operationType, + AbfsHttpOperation abfsHttpOperation) { + if (abfsHttpOperation == null) { return; } @@ -78,11 +148,24 @@ static void updateMetrics(AbfsRestOperationType operationType, boolean isFailedOperation = (status < HttpURLConnection.HTTP_OK || status >= HttpURLConnection.HTTP_INTERNAL_ERROR); + // If status code is 503, it is considered as a throttled operation. + boolean isThrottledOperation = (status == HTTP_UNAVAILABLE); + switch (operationType) { case Append: contentLength = abfsHttpOperation.getBytesSent(); + if (contentLength == 0) { + /* + Signifies the case where we could not update the bytesSent due to + throttling but there were some expectedBytesToBeSent. + */ + if (updateBytesTransferred(isThrottledOperation, abfsHttpOperation)) { + LOG.debug("Updating metrics due to throttling for path {}", abfsHttpOperation.getConnUrl().getPath()); + contentLength = abfsHttpOperation.getExpectedBytesToBeSent(); + } + } if (contentLength > 0) { - singleton.writeThrottler.addBytesTransferred(contentLength, + writeThrottler.addBytesTransferred(contentLength, isFailedOperation); } break; @@ -90,7 +173,7 @@ static void updateMetrics(AbfsRestOperationType operationType, String range = abfsHttpOperation.getConnection().getRequestProperty(HttpHeaderConfigurations.RANGE); contentLength = getContentLengthIfKnown(range); if (contentLength > 0) { - singleton.readThrottler.addBytesTransferred(contentLength, + readThrottler.addBytesTransferred(contentLength, isFailedOperation); } break; @@ -104,21 +187,18 @@ static void updateMetrics(AbfsRestOperationType operationType, * uses this to suspend the request, if necessary, to minimize errors and * maximize throughput. */ - static void sendingRequest(AbfsRestOperationType operationType, + @Override + public void sendingRequest(AbfsRestOperationType operationType, AbfsCounters abfsCounters) { - if (!isAutoThrottlingEnabled) { - return; - } - switch (operationType) { case ReadFile: - if (singleton.readThrottler.suspendIfNecessary() + if (readThrottler.suspendIfNecessary() && abfsCounters != null) { abfsCounters.incrementCounter(AbfsStatistic.READ_THROTTLES, 1); } break; case Append: - if (singleton.writeThrottler.suspendIfNecessary() + if (writeThrottler.suspendIfNecessary() && abfsCounters != null) { abfsCounters.incrementCounter(AbfsStatistic.WRITE_THROTTLES, 1); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsLease.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsLease.java new file mode 100644 index 00000000000000..f72658fb789aa5 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsLease.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; + +public class AbfsDfsLease extends AbfsLease { + + public AbfsDfsLease(final AbfsClient client, + final String path, + final int acquireMaxRetries, + final int acquireRetryInterval, + final Integer leaseDuration, + final TracingContext tracingContext) throws AzureBlobFileSystemException { + super(client, path, acquireMaxRetries, acquireRetryInterval, leaseDuration, + tracingContext); + } + + public AbfsDfsLease(final AbfsClient client, + final String path, + final Integer leaseDuration, + final TracingContext tracingContext) throws AzureBlobFileSystemException { + super(client, path, leaseDuration, tracingContext); + } + + @Override + String callRenewLeaseAPI(final String path, + final String leaseId, + final TracingContext tracingContext) throws AzureBlobFileSystemException { + AbfsRestOperation op = client.renewLease(path, leaseId, tracingContext); + return op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_LEASE_ID); + } + + @Override + AbfsRestOperation callAcquireLeaseAPI(final String path, final Integer leaseDuration, + final TracingContext tracingContext) + throws AzureBlobFileSystemException { + return client.acquireLease(path, + leaseDuration, tracingContext); + } + + @Override + void callReleaseLeaseAPI(final String path, final String leaseID, final TracingContext tracingContext) + throws AzureBlobFileSystemException { + client.releaseLease(path, leaseID, tracingContext); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 584f034837f0e6..7193c20137c182 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -27,6 +27,7 @@ import java.net.URL; import java.util.ArrayList; import java.util.List; +import java.util.Map; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLSocketFactory; @@ -65,6 +66,9 @@ import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_COMP; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COMP_LIST; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; + /** * Represents an HTTP operation. */ @@ -95,6 +99,7 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { // metrics private int bytesSent; + private int expectedBytesToBeSent; private long bytesReceived; // optional trace enabled metrics @@ -199,6 +204,10 @@ public int getBytesSent() { return bytesSent; } + public int getExpectedBytesToBeSent() { + return expectedBytesToBeSent; + } + public long getBytesReceived() { return bytesReceived; } @@ -211,6 +220,10 @@ public String getResponseHeader(String httpHeader) { return connection.getHeaderField(httpHeader); } + public Map> getResponseHeaders() { + return connection.getHeaderFields(); + } + public List getBlockIdList() { return blockIdList; } @@ -334,7 +347,7 @@ public AbfsHttpOperation(final URL url, final String method, final List leaseID = new AtomicReference<>(); private volatile Throwable exception = null; private volatile int acquireRetryCount = 0; private volatile ListenableScheduledFuture future = null; + private final Integer leaseDuration; + + private Timer timer = null; public static class LeaseException extends AzureBlobFileSystemException { public LeaseException(Throwable t) { @@ -81,20 +87,35 @@ public LeaseException(String s) { } } - public AbfsLease(AbfsClient client, String path, TracingContext tracingContext) throws AzureBlobFileSystemException { + /** + * @param client client object for making server calls + * @param path path on which lease has to be acquired, renewed and freed in future + * @param leaseDuration duration for which lease to be taken in seconds + * @param tracingContext for tracing server calls + * + * @throws AzureBlobFileSystemException exception while calling acquireLease API + */ + public AbfsLease(AbfsClient client, String path, + final Integer leaseDuration, + TracingContext tracingContext) throws AzureBlobFileSystemException { this(client, path, DEFAULT_LEASE_ACQUIRE_MAX_RETRIES, - DEFAULT_LEASE_ACQUIRE_RETRY_INTERVAL, tracingContext); + DEFAULT_LEASE_ACQUIRE_RETRY_INTERVAL, leaseDuration, tracingContext); } @VisibleForTesting public AbfsLease(AbfsClient client, String path, int acquireMaxRetries, - int acquireRetryInterval, TracingContext tracingContext) throws AzureBlobFileSystemException { + int acquireRetryInterval, final Integer leaseDuration, TracingContext tracingContext) throws AzureBlobFileSystemException { this.leaseFreed = false; this.client = client; this.path = path; this.tracingContext = tracingContext; + this.leaseDuration = leaseDuration; - if (client.getNumLeaseThreads() < 1) { + /* + * If the number of threads to use for lease operations for infinite lease directories + * and the object is created for infinite-lease (leaseDuration == null). + */ + if (client.getNumLeaseThreads() < 1 && leaseDuration == null) { throw new LeaseException(ERR_NO_LEASE_THREADS); } @@ -104,7 +125,7 @@ public AbfsLease(AbfsClient client, String path, int acquireMaxRetries, acquireLease(retryPolicy, 0, acquireRetryInterval, 0, new TracingContext(tracingContext)); - while (leaseID == null && exception == null) { + while (leaseID.get() == null && exception == null) { try { future.get(); } catch (Exception e) { @@ -122,18 +143,23 @@ public AbfsLease(AbfsClient client, String path, int acquireMaxRetries, private void acquireLease(RetryPolicy retryPolicy, int numRetries, int retryInterval, long delay, TracingContext tracingContext) - throws LeaseException { + throws AzureBlobFileSystemException { LOG.debug("Attempting to acquire lease on {}, retry {}", path, numRetries); if (future != null && !future.isDone()) { throw new LeaseException(ERR_LEASE_FUTURE_EXISTS); } - future = client.schedule(() -> client.acquireLease(path, - INFINITE_LEASE_DURATION, tracingContext), + if(leaseDuration != null) { + leaseID.set(callAcquireLeaseAPI(path, leaseDuration, tracingContext).getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_LEASE_ID)); + spawnLeaseRenewTimer(path, leaseDuration * 1000); + return; + } + future = client.schedule(() -> callAcquireLeaseAPI(path, + INFINITE_LEASE_DURATION, tracingContext), delay, TimeUnit.SECONDS); client.addCallback(future, new FutureCallback() { @Override public void onSuccess(@Nullable AbfsRestOperation op) { - leaseID = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_LEASE_ID); + leaseID.set(op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_LEASE_ID)); LOG.debug("Acquired lease {} on {}", leaseID, path); } @@ -156,6 +182,29 @@ public void onFailure(Throwable throwable) { }); } + private void spawnLeaseRenewTimer(String path, Integer leaseDuration) { + timer = new Timer(); + timer.schedule(new TimerTask() { + @Override + public void run() { + try { + leaseID.set(callRenewLeaseAPI(path, leaseID.get(), tracingContext)); + } catch (AzureBlobFileSystemException e) { + throw new RuntimeException(e); + } + } + }, leaseDuration / 2, leaseDuration / 2); + } + + abstract String callRenewLeaseAPI(final String path, + final String s, + final TracingContext tracingContext) throws AzureBlobFileSystemException; + + abstract AbfsRestOperation callAcquireLeaseAPI(final String path, + final Integer leaseDuration, + final TracingContext tracingContext) + throws AzureBlobFileSystemException; + /** * Cancel future and free the lease. If an exception occurs while releasing the lease, the error * will be logged. If the lease cannot be released, AzureBlobFileSystem breakLease will need to @@ -170,9 +219,12 @@ public void free() { if (future != null && !future.isDone()) { future.cancel(true); } + if (timer != null) { + timer.cancel(); + } TracingContext tracingContext = new TracingContext(this.tracingContext); tracingContext.setOperation(FSOperationType.RELEASE_LEASE); - client.releaseLease(path, leaseID, tracingContext); + callReleaseLeaseAPI(path, leaseID.get(), tracingContext); } catch (IOException e) { LOG.warn("Exception when trying to release lease {} on {}. Lease will need to be broken: {}", leaseID, path, e.getMessage()); @@ -184,12 +236,15 @@ public void free() { } } + abstract void callReleaseLeaseAPI(final String path, final String leaseID, final TracingContext tracingContext) + throws AzureBlobFileSystemException; + public boolean isFreed() { return leaseFreed; } public String getLeaseID() { - return leaseID; + return leaseID.get(); } @VisibleForTesting diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java new file mode 100644 index 00000000000000..b88f4a05d337b4 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +final class AbfsNoOpThrottlingIntercept implements AbfsThrottlingIntercept { + + public static final AbfsNoOpThrottlingIntercept INSTANCE = new AbfsNoOpThrottlingIntercept(); + + private AbfsNoOpThrottlingIntercept() { + } + + @Override + public void updateMetrics(final AbfsRestOperationType operationType, + final AbfsHttpOperation abfsHttpOperation) { + } + + @Override + public void sendingRequest(final AbfsRestOperationType operationType, + final AbfsCounters abfsCounters) { + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOperationMetrics.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOperationMetrics.java new file mode 100644 index 00000000000000..d19c69354a2d33 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOperationMetrics.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Stores Abfs operation metrics during each analysis period. + */ +class AbfsOperationMetrics { + + /** + * No of bytes which could not be transferred due to a failed operation. + */ + private final AtomicLong bytesFailed; + + /** + * No of bytes successfully transferred during a successful operation. + */ + private final AtomicLong bytesSuccessful; + + /** + * Total no of failed operations. + */ + private final AtomicLong operationsFailed; + + /** + * Total no of successful operations. + */ + private final AtomicLong operationsSuccessful; + + /** + * Time when collection of metrics ended. + */ + private long endTime; + + /** + * Time when the collection of metrics started. + */ + private final long startTime; + + AbfsOperationMetrics(long startTime) { + this.startTime = startTime; + this.bytesFailed = new AtomicLong(); + this.bytesSuccessful = new AtomicLong(); + this.operationsFailed = new AtomicLong(); + this.operationsSuccessful = new AtomicLong(); + } + + /** + * + * @return bytes failed to transfer. + */ + AtomicLong getBytesFailed() { + return bytesFailed; + } + + /** + * + * @return bytes successfully transferred. + */ + AtomicLong getBytesSuccessful() { + return bytesSuccessful; + } + + /** + * + * @return no of operations failed. + */ + AtomicLong getOperationsFailed() { + return operationsFailed; + } + + /** + * + * @return no of successful operations. + */ + AtomicLong getOperationsSuccessful() { + return operationsSuccessful; + } + + /** + * + * @return end time of metric collection. + */ + long getEndTime() { + return endTime; + } + + /** + * + * @param endTime sets the end time. + */ + void setEndTime(final long endTime) { + this.endTime = endTime; + } + + /** + * + * @return start time of metric collection. + */ + long getStartTime() { + return startTime; + } + + void addBytesFailed(long bytes) { + this.getBytesFailed().addAndGet(bytes); + } + + void addBytesSuccessful(long bytes) { + this.getBytesSuccessful().addAndGet(bytes); + } + + void incrementOperationsFailed() { + this.getOperationsFailed().incrementAndGet(); + } + + void incrementOperationsSuccessful() { + this.getOperationsSuccessful().incrementAndGet(); + } + +} + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java index 8ef40df29e2cb1..3a986d2cd9a9ce 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java @@ -95,6 +95,7 @@ public class AbfsOutputStream extends OutputStream implements Syncable, private boolean disableOutputStreamFlush; private boolean enableSmallWriteOptimization; private boolean isAppendBlob; + private boolean isExpectHeaderEnabled; private volatile IOException lastError; private long lastFlushOffset; @@ -169,6 +170,7 @@ public AbfsOutputStream(AbfsOutputStreamContext abfsOutputStreamContext) this.position = abfsOutputStreamContext.getPosition(); this.closed = false; this.supportFlush = abfsOutputStreamContext.isEnableFlush(); + this.isExpectHeaderEnabled = abfsOutputStreamContext.isExpectHeaderEnabled(); this.disableOutputStreamFlush = abfsOutputStreamContext .isDisableOutputStreamFlush(); this.enableSmallWriteOptimization @@ -456,7 +458,7 @@ private void uploadBlockAsync(AbfsBlock blockToUpload, * leaseId - The AbfsLeaseId for this request. */ AppendRequestParameters reqParams = new AppendRequestParameters( - offset, 0, bytesLength, mode, false, leaseId); + offset, 0, bytesLength, mode, false, leaseId, isExpectHeaderEnabled); AbfsRestOperation op; if (!OperativeEndpoint.isIngressEnabledOnDFS(prefixMode, client.getAbfsConfiguration())) { try { @@ -737,7 +739,7 @@ private void writeAppendBlobCurrentBufferToService() throws IOException { try (AbfsPerfInfo perfInfo = new AbfsPerfInfo(tracker, "writeCurrentBufferToService", "append")) { AppendRequestParameters reqParams = new AppendRequestParameters(offset, 0, - bytesLength, APPEND_MODE, true, leaseId); + bytesLength, APPEND_MODE, true, leaseId, isExpectHeaderEnabled); AbfsRestOperation op = client.append(path, uploadData.toByteArray(), reqParams, cachedSasToken.get(), new TracingContext(tracingContext)); cachedSasToken.update(op.getSasToken()); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java index 67f0b2a4849b77..94a62abbe99812 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStreamContext.java @@ -33,6 +33,8 @@ public class AbfsOutputStreamContext extends AbfsStreamContext { private boolean enableFlush; + private boolean enableExpectHeader; + private boolean enableSmallWriteOptimization; private boolean disableOutputStreamFlush; @@ -80,6 +82,11 @@ public AbfsOutputStreamContext enableFlush(final boolean enableFlush) { return this; } + public AbfsOutputStreamContext enableExpectHeader(final boolean enableExpectHeader) { + this.enableExpectHeader = enableExpectHeader; + return this; + } + public AbfsOutputStreamContext enableSmallWriteOptimization(final boolean enableSmallWriteOptimization) { this.enableSmallWriteOptimization = enableSmallWriteOptimization; return this; @@ -192,6 +199,10 @@ public boolean isEnableFlush() { return enableFlush; } + public boolean isExpectHeaderEnabled() { + return enableExpectHeader; + } + public boolean isDisableOutputStreamFlush() { return disableOutputStreamFlush; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index 7c957f37cefc07..3585faf7585614 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -29,7 +29,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.azurebfs.AbfsStatistic; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; @@ -40,6 +39,7 @@ import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.PUT_BLOCK_LIST; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_CONTINUE; /** * The AbfsRestOperation for Rest AbfsClient. @@ -49,6 +49,8 @@ public class AbfsRestOperation { private final AbfsRestOperationType operationType; // Blob FS client, which has the credentials, retry policy, and logs. private final AbfsClient client; + // Return intercept instance + private final AbfsThrottlingIntercept intercept; // the HTTP method (PUT, PATCH, POST, GET, HEAD, or DELETE) private final String method; // full URL including query parameters @@ -149,6 +151,7 @@ String getSasToken() { || AbfsHttpConstants.HTTP_METHOD_PATCH.equals(method)); this.sasToken = sasToken; this.abfsCounters = client.getAbfsCounters(); + this.intercept = client.getIntercept(); } /** @@ -230,11 +233,21 @@ private void completeExecute(TracingContext tracingContext) } } - if (result.getStatusCode() >= HttpURLConnection.HTTP_BAD_REQUEST) { + int status = result.getStatusCode(); + /* + If even after exhausting all retries, the http status code has an + invalid value it qualifies for InvalidAbfsRestOperationException. + All http status code less than 1xx range are considered as invalid + status codes. + */ + if (status < HTTP_CONTINUE) { + throw new InvalidAbfsRestOperationException(null, retryCount); + } + + if (status >= HttpURLConnection.HTTP_BAD_REQUEST) { throw new AbfsRestOperationException(result.getStatusCode(), result.getStorageErrorCode(), result.getStorageErrorMessage(), null, result); } - LOG.trace("{} REST operation complete", operationType); } @@ -278,10 +291,10 @@ public void signRequest(final AbfsHttpOperation httpOperation, */ private boolean executeHttpOperation(final int retryCount, TracingContext tracingContext) throws AzureBlobFileSystemException { - AbfsHttpOperation httpOperation = null; + AbfsHttpOperation httpOperation; try { // initialize the HTTP request and open the connection - httpOperation = createNewHttpOperation(); + httpOperation = createHttpOperation(); incrementCounter(AbfsStatistic.CONNECTIONS_MADE, 1); tracingContext.constructHeader(httpOperation); @@ -296,8 +309,7 @@ private boolean executeHttpOperation(final int retryCount, // dump the headers AbfsIoUtils.dumpHeadersToDebugLog("Request Headers", httpOperation.getConnection().getRequestProperties()); - AbfsClientThrottlingIntercept.sendingRequest(operationType, abfsCounters); - + intercept.sendingRequest(operationType, abfsCounters); if (hasRequestBody) { // HttpUrlConnection requires httpOperation.sendRequest(buffer, bufferOffset, bufferLength); @@ -323,7 +335,7 @@ private boolean executeHttpOperation(final int retryCount, LOG.warn("Unknown host name: %s. Retrying to resolve the host name...", hostname); if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) { - throw new InvalidAbfsRestOperationException(ex); + throw new InvalidAbfsRestOperationException(ex, retryCount); } return false; } catch (IOException ex) { @@ -332,12 +344,25 @@ private boolean executeHttpOperation(final int retryCount, } if (!client.getRetryPolicy().shouldRetry(retryCount, -1)) { - throw new InvalidAbfsRestOperationException(ex); + throw new InvalidAbfsRestOperationException(ex, retryCount); } return false; } finally { - AbfsClientThrottlingIntercept.updateMetrics(operationType, httpOperation); + int status = httpOperation.getStatusCode(); + /* + A status less than 300 (2xx range) or greater than or equal + to 500 (5xx range) should contribute to throttling metrics being updated. + Less than 200 or greater than or equal to 500 show failed operations. 2xx + range contributes to successful operations. 3xx range is for redirects + and 4xx range is for user errors. These should not be a part of + throttling backoff computation. + */ + boolean updateMetricsResponseCode = (status < HttpURLConnection.HTTP_MULT_CHOICE + || status >= HttpURLConnection.HTTP_INTERNAL_ERROR); + if (updateMetricsResponseCode) { + intercept.updateMetrics(operationType, httpOperation); + } } LOG.debug("HttpRequest: {}: {}", operationType, httpOperation.toString()); @@ -351,11 +376,6 @@ private boolean executeHttpOperation(final int retryCount, return true; } - @VisibleForTesting - AbfsHttpOperation createNewHttpOperation() throws IOException { - return new AbfsHttpOperation(url, method, requestHeaders); - } - @VisibleForTesting String getMethod() { return method; @@ -366,6 +386,15 @@ void setResult(AbfsHttpOperation result) { this.result = result; } + /** + * Creates new object of {@link AbfsHttpOperation} with the url, method, and + * requestHeaders fields of the AbfsRestOperation object. + */ + @VisibleForTesting + AbfsHttpOperation createHttpOperation() throws IOException { + return new AbfsHttpOperation(url, method, requestHeaders); + } + /** * Incrementing Abfs counters with a long value. * diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java index 349205e6ba0663..83814d883a6f14 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationType.java @@ -36,6 +36,7 @@ public enum AbfsRestOperationType { SetOwner, SetPathProperties, SetPermissions, + SetBlobMetadata, Append, Flush, ReadFile, @@ -46,10 +47,12 @@ public enum AbfsRestOperationType { PutBlob, GetBlobProperties, GetContainerProperties, + GetBlobMetadata, PutBlock, PutBlockList, GetBlockList, DeleteBlob, GetListBlobProperties, - CopyBlob + CopyBlob, + GetBlob } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java new file mode 100644 index 00000000000000..0ceb4335fcef44 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * An interface for Abfs Throttling Interface. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public interface AbfsThrottlingIntercept { + + /** + * Updates the metrics for successful and failed read and write operations. + * @param operationType Only applicable for read and write operations. + * @param abfsHttpOperation Used for status code and data transferred. + */ + void updateMetrics(AbfsRestOperationType operationType, + AbfsHttpOperation abfsHttpOperation); + + /** + * Called before the request is sent. Client-side throttling + * uses this to suspend the request, if necessary, to minimize errors and + * maximize throughput. + * @param operationType Only applicable for read and write operations. + * @param abfsCounters Used for counters. + */ + void sendingRequest(AbfsRestOperationType operationType, + AbfsCounters abfsCounters); + +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingInterceptFactory.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingInterceptFactory.java new file mode 100644 index 00000000000000..0eabe18872d4f9 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingInterceptFactory.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; + +import org.apache.hadoop.fs.azurebfs.utils.WeakReferenceMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class to get an instance of throttling intercept class per account. + */ +final class AbfsThrottlingInterceptFactory { + + private AbfsThrottlingInterceptFactory() { + } + + private static AbfsConfiguration abfsConfig; + + /** + * List of references notified of loss. + */ + private static List lostReferences = new ArrayList<>(); + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsThrottlingInterceptFactory.class); + + /** + * Map which stores instance of ThrottlingIntercept class per account. + */ + private static WeakReferenceMap + interceptMap = new WeakReferenceMap<>( + AbfsThrottlingInterceptFactory::factory, + AbfsThrottlingInterceptFactory::referenceLost); + + /** + * Returns instance of throttling intercept. + * @param accountName Account name. + * @return instance of throttling intercept. + */ + private static AbfsClientThrottlingIntercept factory(final String accountName) { + return new AbfsClientThrottlingIntercept(accountName, abfsConfig); + } + + /** + * Reference lost callback. + * @param accountName key lost. + */ + private static void referenceLost(String accountName) { + lostReferences.add(accountName); + } + + /** + * Returns an instance of AbfsThrottlingIntercept. + * + * @param accountName The account for which we need instance of throttling intercept. + @param abfsConfiguration The object of abfsconfiguration class. + * @return Instance of AbfsThrottlingIntercept. + */ + static synchronized AbfsThrottlingIntercept getInstance(String accountName, + AbfsConfiguration abfsConfiguration) { + abfsConfig = abfsConfiguration; + AbfsThrottlingIntercept intercept; + if (!abfsConfiguration.isAutoThrottlingEnabled()) { + return AbfsNoOpThrottlingIntercept.INSTANCE; + } + // If singleton is enabled use a static instance of the intercept class for all accounts + if (!abfsConfiguration.accountThrottlingEnabled()) { + intercept = AbfsClientThrottlingIntercept.initializeSingleton( + abfsConfiguration); + } else { + // Return the instance from the map + intercept = interceptMap.get(accountName); + if (intercept == null) { + intercept = new AbfsClientThrottlingIntercept(accountName, + abfsConfiguration); + interceptMap.put(accountName, intercept); + } + } + return intercept; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java index 218eecaa45a405..dee1d374d4a046 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ExponentialRetryPolicy.java @@ -24,6 +24,8 @@ import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_CONTINUE; + /** * Retry policy used by AbfsClient. * */ @@ -138,7 +140,7 @@ public ExponentialRetryPolicy(final int retryCount, final int minBackoff, final */ public boolean shouldRetry(final int retryCount, final int statusCode) { return retryCount < this.retryCount - && (statusCode == -1 + && (statusCode < HTTP_CONTINUE || statusCode == HttpURLConnection.HTTP_CLIENT_TIMEOUT || statusCode == HttpURLConnection.HTTP_GONE || statusCode == HTTP_TOO_MANY_REQUESTS diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobConsumer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobConsumer.java index cbbc6e30e3e8d7..80017ab558a78e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobConsumer.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobConsumer.java @@ -18,6 +18,8 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.util.List; + import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; public class ListBlobConsumer { @@ -28,7 +30,7 @@ public ListBlobConsumer(final ListBlobQueue listBlobQueue) { this.listBlobQueue = listBlobQueue; } - public BlobList consume() throws AzureBlobFileSystemException { + public List consume() throws AzureBlobFileSystemException { if (listBlobQueue.getException() != null) { throw listBlobQueue.getException(); } @@ -37,6 +39,6 @@ public BlobList consume() throws AzureBlobFileSystemException { public Boolean isCompleted() { return listBlobQueue.getIsCompleted() - && listBlobQueue.getConsumerLag() == 0; + && listBlobQueue.size() == 0; } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobProducer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobProducer.java index c327e8b1e4407b..0d7852c3e0d391 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobProducer.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobProducer.java @@ -80,20 +80,20 @@ public ListBlobProducer(final String src, this.nextMarker = initNextMarker; new Thread(() -> { do { - if (listBlobQueue.getConsumerLag() >= client.getAbfsConfiguration() - .getMaximumConsumerLag()) { + int maxResult = listBlobQueue.availableSize(); + if (maxResult == 0) { continue; } AbfsRestOperation op = null; try { - op = client.getListBlobs(nextMarker, src, null, tracingContext); + op = client.getListBlobs(nextMarker, src, maxResult, tracingContext); } catch (AzureBlobFileSystemException ex) { listBlobQueue.setFailed(ex); return; } BlobList blobList = op.getResult().getBlobList(); nextMarker = blobList.getNextMarker(); - listBlobQueue.enqueue(blobList); + listBlobQueue.enqueue(blobList.getBlobPropertyList()); if (nextMarker == null) { listBlobQueue.complete(); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobQueue.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobQueue.java index 7e8f6d275d44f7..b2a4f2f537407e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobQueue.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ListBlobQueue.java @@ -19,13 +19,15 @@ package org.apache.hadoop.fs.azurebfs.services; import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.List; import java.util.Queue; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; public class ListBlobQueue { - private final Queue blobLists = new ArrayDeque<>(); + private final Queue blobLists; private int totalProduced = 0; @@ -42,11 +44,17 @@ public class ListBlobQueue { */ private ListBlobProducer producer; - public ListBlobQueue() { + private final int maxSize; + private final int maxConsumedBlobCount; + public ListBlobQueue(int maxSize, int maxConsumedBlobCount) { + blobLists = new ArrayDeque<>(maxSize); + this.maxSize = maxSize; + this.maxConsumedBlobCount = maxConsumedBlobCount; } - public ListBlobQueue(BlobList initBlobList) { + public ListBlobQueue(List initBlobList, int maxSize, int maxConsumedBlobCount) { + this(maxSize, maxConsumedBlobCount); if (initBlobList != null) { enqueue(initBlobList); } @@ -74,20 +82,25 @@ AzureBlobFileSystemException getException() { return failureFromProducer; } - public synchronized void enqueue(BlobList blobList) { - blobLists.add(blobList); - totalProduced += blobList.getBlobPropertyList().size(); + public void enqueue(List blobProperties) { + blobLists.addAll(blobProperties); } - public synchronized BlobList dequeue() { - BlobList blobList = blobLists.poll(); - if (blobList != null) { - totalConsumed += blobList.getBlobPropertyList().size(); + public List dequeue() { + List blobProperties = new ArrayList<>(); + int counter = 0; + while(counter < maxConsumedBlobCount && blobLists.size() > 0) { + blobProperties.add(blobLists.poll()); + counter++; } - return blobList; + return blobProperties; } - public synchronized int getConsumerLag() { - return totalProduced - totalConsumed; + public int size() { + return blobLists.size(); + } + + public int availableSize() { + return maxSize - blobLists.size(); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/OperativeEndpoint.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/OperativeEndpoint.java index c777ad83e6f669..83fa2124141e07 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/OperativeEndpoint.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/OperativeEndpoint.java @@ -23,19 +23,26 @@ * This class is mainly to unify the fallback for all API's to DFS endpoint at a single spot. */ public class OperativeEndpoint { - public static boolean isMkdirEnabledOnDFS(PrefixMode mode, AbfsConfiguration abfsConfiguration) { + public static boolean isMkdirEnabledOnDFS(PrefixMode mode, AbfsConfiguration abfsConfiguration) { + if (mode == PrefixMode.BLOB) { + return abfsConfiguration.shouldMkdirFallbackToDfs(); + } else { + return true; + } + } + + public static boolean isIngressEnabledOnDFS(PrefixMode mode, AbfsConfiguration abfsConfiguration) { if (mode == PrefixMode.BLOB) { - return abfsConfiguration.shouldMkdirFallbackToDfs(); + return abfsConfiguration.shouldIngressFallbackToDfs(); } else { return true; } } - public static boolean isIngressEnabledOnDFS(PrefixMode mode, AbfsConfiguration abfsConfiguration) { + public static boolean isReadEnabledOnDFS(PrefixMode mode, AbfsConfiguration abfsConfiguration) { if (mode == PrefixMode.BLOB) { - return abfsConfiguration.shouldIngressFallbackToDfs(); - } else { - return true; + return abfsConfiguration.shouldReadFallbackToDfs(); } + return true; } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java new file mode 100644 index 00000000000000..52428fdd54a19f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java @@ -0,0 +1,25 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +public enum TimerFunctionality { + RESUME, + + SUSPEND +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java index 613274b6d0374f..241232ed917dcb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java @@ -140,6 +140,10 @@ public void setOperation(FSOperationType operation) { this.opType = operation; } + public int getRetryCount() { + return retryCount; + } + public void setRetryCount(int retryCount) { this.retryCount = retryCount; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java index e27d54b443ca20..857b20b2fd80bd 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java @@ -169,6 +169,20 @@ public static String getMaskedUrl(URL url) { return url.toString().replace(queryString, maskedQueryString); } + public static String encodeMetadataAttribute(String value) throws UnsupportedEncodingException { + // We have to URL encode the attribute as it could + // have URI special characters which unless encoded will result + // in 403 errors from the server. This is due to metadata properties + // being sent in the HTTP header of the request which is in turn used + // on the server side to authorize the request. + return value == null ? null : URLEncoder.encode(value, StandardCharsets.UTF_8.name()); + } + + public static String decodeMetadataAttribute(String encoded) throws UnsupportedEncodingException { + return encoded == null ? null : + java.net.URLDecoder.decode(encoded, StandardCharsets.UTF_8.name()); + } + private UriUtils() { } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/WeakReferenceMap.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/WeakReferenceMap.java new file mode 100644 index 00000000000000..cd47809689c416 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/WeakReferenceMap.java @@ -0,0 +1,333 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import java.lang.ref.WeakReference; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Consumer; +import java.util.function.Function; + +import javax.annotation.Nullable; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.store.LogExactlyOnce; + +import static java.util.Objects.requireNonNull; + +/** + * A map of keys type K to objects of type V which uses weak references, + * so does lot leak memory through long-lived references + * at the expense of losing references when GC takes place.. + * + * This class is intended be used instead of ThreadLocal storage when + * references are to be cleaned up when the instance holding. + * In this use case, the key is the Long key. + * + * Concurrency. + * The class assumes that map entries are rarely contended for when writing, + * and that not blocking other threads is more important than atomicity. + * - a ConcurrentHashMap is used to map keys to weak references, with + * all its guarantees. + * - there is no automatic pruning. + * - see {@link #create(Object)} for the concurrency semantics on entry creation. + */ +@InterfaceAudience.Private +public class WeakReferenceMap { + + private static final Logger LOG = + LoggerFactory.getLogger(WeakReferenceMap.class); + + /** + * The reference map. + */ + private final Map> map = new ConcurrentHashMap<>(); + + /** + * Supplier of new instances. + */ + private final Function factory; + + /** + * Nullable callback when a get on a key got a weak reference back. + * The assumption is that this is for logging/stats, which is why + * no attempt is made to use the call as a supplier of a new value. + */ + private final Consumer referenceLost; + + /** + * Counter of references lost. + */ + private final AtomicLong referenceLostCount = new AtomicLong(); + + /** + * Counter of entries created. + */ + private final AtomicLong entriesCreatedCount = new AtomicLong(); + + /** + * Log to report loss of a reference during the create phase, which + * is believed to be a cause of HADOOP-18456. + */ + private final LogExactlyOnce referenceLostDuringCreation = new LogExactlyOnce(LOG); + + /** + * instantiate. + * @param factory supplier of new instances + * @param referenceLost optional callback on lost references. + */ + public WeakReferenceMap( + Function factory, + @Nullable final Consumer referenceLost) { + + this.factory = requireNonNull(factory); + this.referenceLost = referenceLost; + } + + @Override + public String toString() { + return "WeakReferenceMap{" + + "size=" + size() + + ", referenceLostCount=" + referenceLostCount + + ", entriesCreatedCount=" + entriesCreatedCount + + '}'; + } + + /** + * Map size. + * @return the current map size. + */ + public int size() { + return map.size(); + } + + /** + * Clear all entries. + */ + public void clear() { + map.clear(); + } + + /** + * look up the value, returning the possibly empty weak reference + * to a value, or null if no value was found. + * @param key key to look up + * @return null if there is no entry, a weak reference if found + */ + public WeakReference lookup(K key) { + return map.get(key); + } + + /** + * Get the value, creating if needed. + * @param key key. + * @return an instance. + */ + public V get(K key) { + final WeakReference currentWeakRef = lookup(key); + // resolve it, after which if not null, we have a strong reference + V strongVal = resolve(currentWeakRef); + if (strongVal != null) { + // all good. + return strongVal; + } + + // here, either currentWeakRef was null, or its reference was GC'd. + if (currentWeakRef != null) { + // garbage collection removed the reference. + + // explicitly remove the weak ref from the map if it has not + // been updated by this point + // this is here just for completeness. + map.remove(key, currentWeakRef); + + // log/report the loss. + noteLost(key); + } + + // create a new value and add it to the map + return create(key); + } + + /** + * Create a new instance under a key. + *

+ * The instance is created, added to the map and then the + * map value retrieved. + * This ensures that the reference returned is that in the map, + * even if there is more than one entry being created at the same time. + * If that race does occur, it will be logged the first time it happens + * for this specific map instance. + *

+ * HADOOP-18456 highlighted the risk of a concurrent GC resulting a null + * value being retrieved and so returned. + * To prevent this: + *

    + *
  1. A strong reference is retained to the newly created instance + * in a local variable.
  2. + *
  3. That variable is used after the resolution process, to ensure + * the JVM doesn't consider it "unreachable" and so eligible for GC.
  4. + *
  5. A check is made for the resolved reference being null, and if so, + * the put() is repeated
  6. + *
+ * @param key key + * @return the created value + */ + public V create(K key) { + entriesCreatedCount.incrementAndGet(); + /* + Get a strong ref so even if a GC happens in this method the reference is not lost. + It is NOT enough to have a reference in a field, it MUST be used + so as to ensure the reference isn't optimized away prematurely. + "A reachable object is any object that can be accessed in any potential continuing + computation from any live thread." + */ + + final V strongRef = requireNonNull(factory.apply(key), + "factory returned a null instance"); + V resolvedStrongRef; + do { + WeakReference newWeakRef = new WeakReference<>(strongRef); + + // put it in the map + map.put(key, newWeakRef); + + // get it back from the map + WeakReference retrievedWeakRef = map.get(key); + // resolve that reference, handling the situation where somehow it was removed from the map + // between the put() and the get() + resolvedStrongRef = resolve(retrievedWeakRef); + if (resolvedStrongRef == null) { + referenceLostDuringCreation.warn("reference to %s lost during creation", key); + noteLost(key); + } + } while (resolvedStrongRef == null); + + // note if there was any change in the reference. + // as this forces strongRef to be kept in scope + if (strongRef != resolvedStrongRef) { + LOG.debug("Created instance for key {}: {} overwritten by {}", + key, strongRef, resolvedStrongRef); + } + + return resolvedStrongRef; + } + + /** + * Put a value under the key. + * A null value can be put, though on a get() call + * a new entry is generated + * + * @param key key + * @param value value + * @return any old non-null reference. + */ + public V put(K key, V value) { + return resolve(map.put(key, new WeakReference<>(value))); + } + + /** + * Remove any value under the key. + * @param key key + * @return any old non-null reference. + */ + public V remove(K key) { + return resolve(map.remove(key)); + } + + /** + * Does the map have a valid reference for this object? + * no-side effects: there's no attempt to notify or cleanup + * if the reference is null. + * @param key key to look up + * @return true if there is a valid reference. + */ + public boolean containsKey(K key) { + final WeakReference current = lookup(key); + return resolve(current) != null; + } + + /** + * Given a possibly null weak reference, resolve + * its value. + * @param r reference to resolve + * @return the value or null + */ + protected V resolve(WeakReference r) { + return r == null ? null : r.get(); + } + + /** + * Prune all null weak references, calling the referenceLost + * callback for each one. + * + * non-atomic and non-blocking. + * @return the number of entries pruned. + */ + public int prune() { + int count = 0; + final Iterator>> it = map.entrySet().iterator(); + while (it.hasNext()) { + final Map.Entry> next = it.next(); + if (next.getValue().get() == null) { + it.remove(); + count++; + noteLost(next.getKey()); + } + } + return count; + } + + /** + * Notify the reference lost callback. + * @param key key of lost reference + */ + private void noteLost(final K key) { + // increment local counter + referenceLostCount.incrementAndGet(); + + // and call any notification function supplied in the constructor + if (referenceLost != null) { + referenceLost.accept(key); + } + } + + /** + * Get count of references lost as detected + * during prune() or get() calls. + * @return count of references lost + */ + public final long getReferenceLostCount() { + return referenceLostCount.get(); + } + + /** + * Get count of entries created on demand. + * @return count of entries created + */ + public final long getEntriesCreatedCount() { + return entriesCreatedCount.get(); + } +} + diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index dfb7f3f42a5cf9..ac77765f9e0b52 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -769,6 +769,26 @@ Hflush() being the only documented API that can provide persistent data transfer, Flush() also attempting to persist buffered data will lead to performance issues. +<<<<<<< HEAD +======= +### Hundred Continue Options + +`fs.azure.account.expect.header.enabled`: This configuration parameter is used +to specify whether you wish to send a expect 100 continue header with each +append request or not. It is configured to true by default. This flag configures +the client to check with the Azure store before uploading a block of data from +an output stream. This allows the client to throttle back gracefully -before +actually attempting to upload the block. In experiments this provides +significant throughput improvements under heavy load. For more information : +- https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expect + + +### Account level throttling Options + +`fs.azure.account.operation.idle.timeout`: This value specifies the time after which the timer for the analyzer (read or +write) should be paused until no new request is made again. The default value for the same is 60 seconds. + +>>>>>>> c88011c6046... HADOOP-18146: ABFS: Added changes for expect hundred continue header (#4039) ### HNS Check Options Config `fs.azure.account.hns.enabled` provides an option to specify whether the storage account is HNS enabled or not. In case the config is not provided, @@ -874,6 +894,9 @@ when there are too many writes from the same process. time. Effectively this will be the threadpool size within the AbfsOutputStream instance. Set the value in between 1 to 8 both inclusive. +`fs.azure.analysis.period`: The time after which sleep duration is recomputed after analyzing metrics. The default value +for the same is 10 seconds. + `fs.azure.write.max.requests.to.queue`: To set the maximum write requests that can be queued. Memory consumption of AbfsOutputStream instance can be tuned with this config considering each queued request holds a buffer. Set diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java index 932b7638a48481..cca2a94add2bc5 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java @@ -25,6 +25,7 @@ import java.util.UUID; import java.util.concurrent.Callable; +import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.PrefixMode; import org.junit.After; @@ -42,6 +43,7 @@ import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager; import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; import org.apache.hadoop.fs.azurebfs.services.AuthType; +import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; import org.apache.hadoop.fs.azure.AzureNativeFileSystemStore; import org.apache.hadoop.fs.azure.NativeAzureFileSystem; import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; @@ -73,7 +75,7 @@ public abstract class AbstractAbfsIntegrationTest extends AbstractAbfsTestWithTimeout { private static final Logger LOG = - LoggerFactory.getLogger(AbstractAbfsIntegrationTest.class); + LoggerFactory.getLogger(AbstractAbfsIntegrationTest.class); private boolean isIPAddress; private NativeAzureFileSystem wasb; @@ -110,7 +112,7 @@ protected AbstractAbfsIntegrationTest() throws Exception { if (authType == AuthType.SharedKey) { assumeTrue("Not set: " + FS_AZURE_ACCOUNT_KEY, - abfsConfig.get(FS_AZURE_ACCOUNT_KEY) != null); + abfsConfig.get(FS_AZURE_ACCOUNT_KEY) != null); // Update credentials } else { assumeTrue("Not set: " + FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME, @@ -243,6 +245,9 @@ public Hashtable call() throws Exception { } } + public AccessTokenProvider getAccessTokenProvider(final AzureBlobFileSystem fs) { + return ITestAbfsClient.getAccessTokenProvider(fs.getAbfsStore().getClient()); + } public void loadConfiguredFileSystem() throws Exception { // disable auto-creation of filesystem @@ -506,4 +511,30 @@ protected long assertAbfsStatistics(AbfsStatistic statistic, (long) metricMap.get(statistic.getStatName())); return expectedValue; } + + /** + * For creating directory with implicit parents. Doesn't change already explicit + * parents. + */ + void createAzCopyDirectory(Path path) throws Exception { + AzcopyHelper azcopyHelper = new AzcopyHelper( + getAccountName(), getFileSystemName(), getFileSystem().getAbfsStore() + .getAbfsConfiguration() + .getRawConfiguration(), getFileSystem().getAbfsStore().getPrefixMode()); + azcopyHelper.createFolderUsingAzcopy( + getFileSystem().makeQualified(path).toUri().getPath().substring(1)); + } + + /** + * For creating files with implicit parents. Doesn't change already explicit + * parents. + */ + void createAzCopyFile(Path path) throws Exception { + AzcopyHelper azcopyHelper = new AzcopyHelper(getAccountName(), + getFileSystemName(), getFileSystem().getAbfsStore() + .getAbfsConfiguration() + .getRawConfiguration(), getFileSystem().getAbfsStore().getPrefixMode()); + azcopyHelper.createFileUsingAzcopy( + getFileSystem().makeQualified(path).toUri().getPath().substring(1)); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/BlobDirectoryStateHelper.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/BlobDirectoryStateHelper.java index e4fa38e365ec32..dad4551f65dbdb 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/BlobDirectoryStateHelper.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/BlobDirectoryStateHelper.java @@ -18,7 +18,6 @@ package org.apache.hadoop.fs.azurebfs; -import java.io.FileNotFoundException; import java.io.IOException; import java.net.HttpURLConnection; import java.util.List; @@ -29,7 +28,6 @@ import org.mockito.Mockito; -import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.services.BlobProperty; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java index 98ce66d4cbba6a..b45940f1549ca3 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsRestOperationException.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.azurebfs; +import java.io.FileNotFoundException; import java.io.IOException; import org.assertj.core.api.Assertions; @@ -31,6 +32,8 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.services.PrefixMode; + import org.mockito.Mockito; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DOT; @@ -63,7 +66,12 @@ public void testAbfsRestOperationExceptionFormat() throws IOException { Assert.assertEquals(4, errorFields.length); // Check status message, status code, HTTP Request Type and URL. - Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim()); + if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) { + Assert.assertEquals("Operation failed: \"The specified blob does not exist.\"", errorFields[0].trim()); + } + else { + Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim()); + } Assert.assertEquals("404", errorFields[1].trim()); Assert.assertEquals("HEAD", errorFields[2].trim()); Assert.assertTrue(errorFields[3].trim().startsWith("http")); @@ -79,7 +87,12 @@ public void testAbfsRestOperationExceptionFormat() throws IOException { if (!getAbfsStore(fs).getAbfsConfiguration().enableAbfsListIterator()) { Assert.assertEquals(6, errorFields.length); // Check status message, status code, HTTP Request Type and URL. - Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim()); + if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) { + Assert.assertEquals("Operation failed: \"The specified blob does not exist.\"", errorFields[0].trim()); + } + else { + Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim()); + } Assert.assertEquals("404", errorFields[1].trim()); Assert.assertEquals("GET", errorFields[2].trim()); Assert.assertTrue(errorFields[3].trim().startsWith("http")); @@ -90,7 +103,12 @@ public void testAbfsRestOperationExceptionFormat() throws IOException { } else { Assert.assertEquals(4, errorFields.length); // Check status message, status code, HTTP Request Type and URL. - Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim()); + if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) { + Assert.assertEquals("Operation failed: \"The specified blob does not exist.\"", errorFields[0].trim()); + } + else { + Assert.assertEquals("Operation failed: \"The specified path does not exist.\"", errorFields[0].trim()); + } Assert.assertEquals("404", errorFields[1].trim()); Assert.assertEquals("HEAD", errorFields[2].trim()); Assert.assertTrue(errorFields[3].trim().startsWith("http")); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java index fd4f8d4622e075..a6202f54378d4f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAppend.java @@ -33,12 +33,13 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; import org.apache.hadoop.fs.azurebfs.services.OperativeEndpoint; import org.apache.hadoop.fs.azurebfs.services.PrefixMode; -import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient; +import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.junit.Assume; import org.junit.Test; @@ -242,6 +243,19 @@ public void testCreateEmptyBlob() throws IOException { Mockito.any(TracingContext.class)); } + @Test + public void testAppendImplicitDirectoryAzcopy() throws Exception { + Assume.assumeTrue(getFileSystem().getAbfsStore().getPrefixMode() == PrefixMode.BLOB); + AzureBlobFileSystem fs = getFileSystem(); + createAzCopyDirectory(new Path("/src")); + createAzCopyFile(new Path("/src/file")); + intercept(AbfsRestOperationException.class, () -> { + fs.getAbfsStore().getBlobProperty(new Path("/src"), Mockito.mock( + TracingContext.class)); + }); + intercept(FileNotFoundException.class, () -> fs.append(new Path("/src"))); + } + /** * Verify that no calls to getBlockList were made. */ @@ -252,7 +266,7 @@ public void testCreateNonEmptyBlob() throws IOException { AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); Mockito.doReturn(store).when(fs).getAbfsStore(); AbfsClient client = store.getClient(); - AbfsClient testClient = Mockito.spy(TestAbfsClient.createTestClientFromCurrentContext( + AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext( client, fs.getAbfsStore().getAbfsConfiguration())); store.setClient(testClient); @@ -277,7 +291,7 @@ public void testValidateGetBlockList() throws Exception { AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); Mockito.doReturn(store).when(fs).getAbfsStore(); AbfsClient client = store.getClient(); - AbfsClient testClient = Mockito.spy(TestAbfsClient.createTestClientFromCurrentContext( + AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext( client, fs.getAbfsStore().getAbfsConfiguration())); store.setClient(testClient); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAttributes.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAttributes.java index beb7d0ebaaa8ea..e1eb315bc73bed 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAttributes.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemAttributes.java @@ -19,6 +19,8 @@ package org.apache.hadoop.fs.azurebfs; import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.EnumSet; import org.junit.Assume; @@ -26,8 +28,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.XAttrSetFlag; -import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; -import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; +import org.apache.hadoop.fs.azurebfs.services.PrefixMode; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -42,70 +43,218 @@ public ITestAzureBlobFileSystemAttributes() throws Exception { super(); } + /** + * Test GetXAttr() and SetXAttr() with Unicode Attribute Values. + * DFS does not support Unicode characters in user-defined metadata properties. + * Blob Endpoint supports Unicode encoded in UTF_8 character encoding. + * @throws Exception + */ @Test - public void testSetGetXAttr() throws Exception { + public void testGetSetXAttr() throws Exception { AzureBlobFileSystem fs = getFileSystem(); - AbfsConfiguration conf = fs.getAbfsStore().getAbfsConfiguration(); - Assume.assumeTrue(getIsNamespaceEnabled(fs)); - - byte[] attributeValue1 = fs.getAbfsStore().encodeAttribute("hi"); - byte[] attributeValue2 = fs.getAbfsStore().encodeAttribute("你好"); - String attributeName1 = "user.asciiAttribute"; - String attributeName2 = "user.unicodeAttribute"; - Path testFile = path("setGetXAttr"); - - // after creating a file, the xAttr should not be present - touch(testFile); - assertNull(fs.getXAttr(testFile, attributeName1)); - - // after setting the xAttr on the file, the value should be retrievable - fs.registerListener( - new TracingHeaderValidator(conf.getClientCorrelationId(), - fs.getFileSystemId(), FSOperationType.SET_ATTR, true, 0)); - fs.setXAttr(testFile, attributeName1, attributeValue1); - fs.setListenerOperation(FSOperationType.GET_ATTR); - assertArrayEquals(attributeValue1, fs.getXAttr(testFile, attributeName1)); - fs.registerListener(null); - - // after setting a second xAttr on the file, the first xAttr values should not be overwritten - fs.setXAttr(testFile, attributeName2, attributeValue2); - assertArrayEquals(attributeValue1, fs.getXAttr(testFile, attributeName1)); - assertArrayEquals(attributeValue2, fs.getXAttr(testFile, attributeName2)); + final Path path = new Path("a/b"); + fs.create(path); + + String attributeName1 = "user.attribute1"; + String attributeName2 = "user.attribute2"; + String decodedAttributeValue1; + String decodedAttributeValue2; + byte[] attributeValue1; + byte[] attributeValue2; + + if(fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) { + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); // Blob endpoint Currently Supports FNS only + decodedAttributeValue1 = "hi"; + decodedAttributeValue2 = "hello"; //Блюз //你好 + // TODO: Modify them to unicode characters when support is added + attributeValue1 = fs.getAbfsStore().encodeAttribute(decodedAttributeValue1); + attributeValue2 = fs.getAbfsStore().encodeAttribute(decodedAttributeValue2); + } + else { + decodedAttributeValue1 = "hi"; + decodedAttributeValue2 = "hello"; // DFS Endpoint only Supports ASCII + attributeValue1 = fs.getAbfsStore().encodeAttribute(decodedAttributeValue1); + attributeValue2 = fs.getAbfsStore().encodeAttribute(decodedAttributeValue2); + } + + // Attribute not present initially + assertNull(fs.getXAttr(path, attributeName1)); + assertNull(fs.getXAttr(path, attributeName2)); + + // Set the Attributes + fs.setXAttr(path, attributeName1, attributeValue1); + + // Check if the attribute is retrievable + byte[] rv = fs.getXAttr(path, attributeName1); + assertTrue(Arrays.equals(rv, attributeValue1)); + assertEquals(new String(rv, StandardCharsets.UTF_8), decodedAttributeValue1); + + // Set the second Attribute + fs.setXAttr(path, attributeName2, attributeValue2); + + // Check all the attributes present and previous Attribute not overridden + rv = fs.getXAttr(path, attributeName1); + assertTrue(Arrays.equals(rv, attributeValue1)); + assertEquals(new String(rv, StandardCharsets.UTF_8), decodedAttributeValue1); + rv = fs.getXAttr(path, attributeName2); + assertTrue(Arrays.equals(rv, attributeValue2)); + assertEquals(new String(rv, StandardCharsets.UTF_8), decodedAttributeValue2); + } + + @Test + public void testGetXAttrOnImplicitPath() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + AzcopyHelper azcopyHelper = new AzcopyHelper( + getAccountName(), + getFileSystemName(), + getRawConfiguration(), + fs.getAbfsStore().getPrefixMode() + ); + + Path testPath = new Path("a/b"); + azcopyHelper.createFolderUsingAzcopy(fs.makeQualified(testPath).toUri().getPath().substring(1)); + + assertTrue("Path is implicit.", + BlobDirectoryStateHelper.isImplicitDirectory(testPath, fs)); + + String attributeName1 = "user.attribute1"; + assertNull(fs.getXAttr(testPath, attributeName1)); + } + + /** + * Trying to set same attribute multiple times should result in no failure + * @throws Exception + */ + @Test + public void testSetXAttrMultipleOperations() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + final Path path = new Path("a/b"); + fs.create(path); + + String attributeName1 = "user.attribute1"; + byte[] attributeValue1; + String decodedAttributeValue1 = "hi"; + + if(fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) { + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); + // TODO: Modify them to unicode characters when support is added + attributeValue1 = fs.getAbfsStore().encodeAttribute("hi"); + } + else { + attributeValue1 = fs.getAbfsStore().encodeAttribute("hi"); + } + + // Attribute not present initially + assertNull(fs.getXAttr(path, attributeName1)); + + // Set the Attributes Multiple times + // Filesystem internally adds create and replace flags + fs.setXAttr(path, attributeName1, attributeValue1); + fs.setXAttr(path, attributeName1, attributeValue1); + + // Check if the attribute is retrievable + byte[] rv = fs.getXAttr(path, attributeName1); + assertTrue(Arrays.equals(rv, attributeValue1)); + assertEquals(new String(rv, StandardCharsets.UTF_8), decodedAttributeValue1); + } + + /** + * Test that setting metadata over marker blob do not override + * x-ms-meta-hdi_IsFolder + * TODO: Confirm Expected Behavior + * @throws Exception + */ + @Test + public void testSetXAttrOverMarkerBlob() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + final Path path = new Path("a/b"); + fs.mkdirs(path); + + assertTrue(BlobDirectoryStateHelper.isExplicitDirectory(path, fs)); + + String attributeName1 = "user.attribute1"; + byte[] attributeValue1; + + if(fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) { + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); + // TODO: Modify them to unicode characters when support is added + attributeValue1 = fs.getAbfsStore().encodeAttribute("hi"); + } + else { + attributeValue1 = fs.getAbfsStore().encodeAttribute("hi"); + } + + // Attribute not present initially + assertNull(fs.getXAttr(path, attributeName1)); + + // Set the Attribute on marker blob + fs.setXAttr(path, attributeName1, attributeValue1); + + // Check if the attribute is retrievable + byte[] rv = fs.getXAttr(path, attributeName1); + assertTrue(Arrays.equals(rv, attributeValue1)); + + // Check if Marker blob still exists as marker. + assertTrue(BlobDirectoryStateHelper.isExplicitDirectory(path, fs)); } @Test public void testSetGetXAttrCreateReplace() throws Exception { AzureBlobFileSystem fs = getFileSystem(); - Assume.assumeTrue(getIsNamespaceEnabled(fs)); - byte[] attributeValue = fs.getAbfsStore().encodeAttribute("one"); - String attributeName = "user.someAttribute"; - Path testFile = path("createReplaceXAttr"); + final Path testFile = new Path("a/b"); + + String attributeName = "user.attribute1"; + String decodedAttributeValue1; + byte[] attributeValue; + + if(fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) { + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); // Blob endpoint Currently Supports FNS only + decodedAttributeValue1 = "hi"; + attributeValue = decodedAttributeValue1.getBytes(StandardCharsets.UTF_8); + } + else { + decodedAttributeValue1 = "hi"; + attributeValue = fs.getAbfsStore().encodeAttribute(decodedAttributeValue1); + } // after creating a file, it must be possible to create a new xAttr - touch(testFile); + fs.create(testFile); fs.setXAttr(testFile, attributeName, attributeValue, CREATE_FLAG); assertArrayEquals(attributeValue, fs.getXAttr(testFile, attributeName)); - // however after the xAttr is created, creating it again must fail + // however, after the xAttr is created, creating it again must fail intercept(IOException.class, () -> fs.setXAttr(testFile, attributeName, attributeValue, CREATE_FLAG)); } @Test public void testSetGetXAttrReplace() throws Exception { AzureBlobFileSystem fs = getFileSystem(); - Assume.assumeTrue(getIsNamespaceEnabled(fs)); - byte[] attributeValue1 = fs.getAbfsStore().encodeAttribute("one"); - byte[] attributeValue2 = fs.getAbfsStore().encodeAttribute("two"); - String attributeName = "user.someAttribute"; - Path testFile = path("replaceXAttr"); + final Path testFile = new Path("a/b"); + + String attributeName = "user.attribute1"; + String decodedAttributeValue1 = "one"; + String decodedAttributeValue2 = "two"; + + byte[] attributeValue1; + byte[] attributeValue2; + + if(fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) { + Assume.assumeTrue(!getIsNamespaceEnabled(fs)); // Blob endpoint Currently Supports FNS only + attributeValue1 = decodedAttributeValue1.getBytes(StandardCharsets.UTF_8); + attributeValue2 = decodedAttributeValue2.getBytes(StandardCharsets.UTF_8); + } + else { + attributeValue1 = fs.getAbfsStore().encodeAttribute(decodedAttributeValue1); + attributeValue2 = fs.getAbfsStore().encodeAttribute(decodedAttributeValue2); + } // after creating a file, it must not be possible to replace an xAttr intercept(IOException.class, () -> { - touch(testFile); + fs.create(testFile); fs.setXAttr(testFile, attributeName, attributeValue1, REPLACE_FLAG); }); - // however after the xAttr is created, replacing it must succeed + // however, after the xAttr is created, replacing it must succeed fs.setXAttr(testFile, attributeName, attributeValue1, CREATE_FLAG); fs.setXAttr(testFile, attributeName, attributeValue2, REPLACE_FLAG); assertArrayEquals(attributeValue2, fs.getXAttr(testFile, attributeName)); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index eaf2f94269ed60..122b9ab36d81db 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -61,7 +61,7 @@ import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; -import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient; +import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; import org.mockito.Mockito; @@ -75,6 +75,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_CLIENT_PROVIDED_ENCRYPTION_KEY; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_BLOB_MKDIR_OVERWRITE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LEASE_CREATE_NON_RECURSIVE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_MKDIRS_FALLBACK_TO_DFS; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_DNS_PREFIX; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.WASB_DNS_PREFIX; @@ -954,7 +955,9 @@ public void testActiveCreateNonRecursiveDenyParallelReadOnAtomicDir() throws Exc Assume.assumeTrue( getFileSystem().getAbfsStore().getAbfsConfiguration().getPrefixMode() == PrefixMode.BLOB); - AzureBlobFileSystem fileSystem = (AzureBlobFileSystem) FileSystem.newInstance(getRawConfiguration()); + Configuration configuration = Mockito.spy(getRawConfiguration()); + configuration.set(FS_AZURE_LEASE_CREATE_NON_RECURSIVE, "true"); + AzureBlobFileSystem fileSystem = (AzureBlobFileSystem) FileSystem.newInstance(configuration); AbfsClient client = Mockito.spy(fileSystem.getAbfsClient()); fileSystem.getAbfsStore().setClient(client); fileSystem.setWorkingDirectory(new Path("/")); @@ -994,6 +997,53 @@ public void testActiveCreateNonRecursiveDenyParallelReadOnAtomicDir() throws Exc Assert.assertTrue(fileSystem.exists(new Path("/hbase/dir/file"))); } + @Test + public void testActiveCreateNonRecursiveNotDenyParallelReadOnAtomicDirIfLeaseConfigDisabled() throws Exception { + Assume.assumeTrue( + getFileSystem().getAbfsStore().getAbfsConfiguration().getPrefixMode() + == PrefixMode.BLOB); + Configuration configuration = Mockito.spy(getRawConfiguration()); + AzureBlobFileSystem fileSystem = (AzureBlobFileSystem) FileSystem.newInstance(configuration); + AbfsClient client = Mockito.spy(fileSystem.getAbfsClient()); + fileSystem.getAbfsStore().setClient(client); + fileSystem.setWorkingDirectory(new Path("/")); + fileSystem.mkdirs(new Path("/hbase/dir")); + fileSystem.create(new Path("/hbase/dir/file")); + AtomicBoolean createCalled = new AtomicBoolean(false); + AtomicBoolean parallelRenameDone = new AtomicBoolean(false); + AtomicBoolean exceptionCaught = new AtomicBoolean(false); + + Mockito.doAnswer(answer -> { + AbfsRestOperation op = (AbfsRestOperation) answer.callRealMethod(); + createCalled.set(true); + while(!parallelRenameDone.get()); + return op; + }).when(client).createPathBlob(Mockito.anyString(), Mockito.anyBoolean(), + Mockito.anyBoolean(), Mockito.nullable(HashMap.class), Mockito.nullable(String.class), Mockito.nullable(TracingContext.class)); + + new Thread(() -> { + try { + while(!createCalled.get()); + getFileSystem().rename(new Path("/hbase/dir/"), new Path("/hbase/dir2")); + } catch (Exception e) { + exceptionCaught.set(true); + } finally { + parallelRenameDone.set(true); + } + }).start(); + + fileSystem.createFile(new Path("/hbase/dir/file1")) + .overwrite(false) + .replication((short) 1) + .bufferSize(1024) + .blockSize(1024) + .build(); + + Assert.assertFalse(exceptionCaught.get()); + Assert.assertFalse(fileSystem.exists(new Path("/hbase/dir/file"))); + Assert.assertTrue(fileSystem.exists(new Path("/hbase/dir2/file"))); + } + /** * Attempts to use to the ABFS stream after it is closed. */ @@ -1123,9 +1173,15 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) // One request to server to create path should be issued // two calls added for - - // 1. getFileStatus - // 2. actual create call + // 1. getFileStatus : 1 + // 2. actual create call: 1 createRequestCount+=2; + + // In case of blob endpoint getFileStatus makes additional call to check if path is implicit + if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) { + createRequestCount++; + } + createRequestCount+=ifBlobCheckIfPathDir; assertAbfsStatistics( @@ -1226,7 +1282,7 @@ public void testNegativeScenariosForCreateOverwriteDisabled() // Get mock AbfsClient with current config AbfsClient mockClient - = TestAbfsClient.getMockAbfsClient( + = ITestAbfsClient.getMockAbfsClient( fs.getAbfsStore().getClient(), fs.getAbfsStore().getAbfsConfiguration()); @@ -1286,6 +1342,13 @@ public void testNegativeScenariosForCreateOverwriteDisabled() .when(mockClient) .getPathStatus(any(String.class), eq(false), any(TracingContext.class)); + doThrow(fileNotFoundResponseEx) // Scn1: GFS fails with Http404 + .doThrow(serverErrorResponseEx) // Scn2: GFS fails with Http500 + .doReturn(successOp) // Scn3: create overwrite=true fails with Http412 + .doReturn(successOp) // Scn4: create overwrite=true fails with Http500 + .when(mockClient) + .getBlobProperty(any(Path.class), any(TracingContext.class)); + // mock for overwrite=true doThrow( preConditionResponseEx) // Scn3: create overwrite=true fails with Http412 @@ -1432,7 +1495,12 @@ private String extractFileEtag(String fileName) throws IOException { final AzureBlobFileSystem fs = getFileSystem(); final AbfsClient client = fs.getAbfsClient(); final TracingContext testTracingContext = getTestTracingContext(fs, false); - AbfsRestOperation op = client.getPathStatus(fileName, true, testTracingContext); + AbfsRestOperation op; + if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) { + op = client.getBlobProperty(new Path(fileName), testTracingContext); + } else { + op = client.getPathStatus(fileName, true, testTracingContext); + } return AzureBlobFileSystemStore.extractEtagHeader(op.getResult()); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java index bc18c85a9e711c..a2549aa17b5113 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java @@ -38,7 +38,7 @@ import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; -import org.apache.hadoop.fs.azurebfs.services.TestAbfsClient; +import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; import org.apache.hadoop.fs.azurebfs.services.TestAbfsPerfTracker; import org.apache.hadoop.fs.azurebfs.utils.TestMockHelpers; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; @@ -195,7 +195,7 @@ public void testDeleteIdempotency() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); AbfsClient abfsClient = fs.getAbfsStore().getClient(); - AbfsClient testClient = TestAbfsClient.createTestClientFromCurrentContext( + AbfsClient testClient = ITestAbfsClient.createTestClientFromCurrentContext( abfsClient, abfsConfig); @@ -242,7 +242,7 @@ public void testDeleteIdempotency() throws Exception { public void testDeleteIdempotencyTriggerHttp404() throws Exception { final AzureBlobFileSystem fs = getFileSystem(); - AbfsClient client = TestAbfsClient.createTestClientFromCurrentContext( + AbfsClient client = ITestAbfsClient.createTestClientFromCurrentContext( fs.getAbfsStore().getClient(), this.getConfiguration()); @@ -261,7 +261,7 @@ public void testDeleteIdempotencyTriggerHttp404() throws Exception { getTestTracingContext(fs, true))); // mock idempotency check to mimic retried case - AbfsClient mockClient = TestAbfsClient.getMockAbfsClient( + AbfsClient mockClient = ITestAbfsClient.getMockAbfsClient( fs.getAbfsStore().getClient(), this.getConfiguration()); AzureBlobFileSystemStore mockStore = mock(AzureBlobFileSystemStore.class); @@ -276,10 +276,10 @@ public void testDeleteIdempotencyTriggerHttp404() throws Exception { // Case 2: Mimic retried case // Idempotency check on Delete always returns success - AbfsRestOperation idempotencyRetOp = TestAbfsClient.getRestOp( + AbfsRestOperation idempotencyRetOp = ITestAbfsClient.getRestOp( DeletePath, mockClient, HTTP_METHOD_DELETE, - TestAbfsClient.getTestUrl(mockClient, "/NonExistingPath"), - TestAbfsClient.getTestRequestHeaders(mockClient)); + ITestAbfsClient.getTestUrl(mockClient, "/NonExistingPath"), + ITestAbfsClient.getTestRequestHeaders(mockClient)); idempotencyRetOp.hardSetResult(HTTP_OK); doReturn(idempotencyRetOp).when(mockClient).deleteIdempotencyCheckOp(any()); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemExplictImplicitRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemExplictImplicitRename.java index 8ffbbadf13a18d..44f2f1b9c94754 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemExplictImplicitRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemExplictImplicitRename.java @@ -49,32 +49,6 @@ public void setup() throws Exception { == PrefixMode.BLOB); } - /** - * For creating directory with implicit parents. Doesn't change already explicit - * parents. - */ - void createAzCopyDirectory(Path path) throws Exception { - AzcopyHelper azcopyHelper = new AzcopyHelper( - getAccountName(), getFileSystemName(), getFileSystem().getAbfsStore() - .getAbfsConfiguration() - .getRawConfiguration(), getFileSystem().getAbfsStore().getPrefixMode()); - azcopyHelper.createFolderUsingAzcopy( - getFileSystem().makeQualified(path).toUri().getPath().substring(1)); - } - - /** - * For creating files with implicit parents. Doesn't change already explicit - * parents. - */ - void createAzCopyFile(Path path) throws Exception { - AzcopyHelper azcopyHelper = new AzcopyHelper(getAccountName(), - getFileSystemName(), getFileSystem().getAbfsStore() - .getAbfsConfiguration() - .getRawConfiguration(), getFileSystem().getAbfsStore().getPrefixMode()); - azcopyHelper.createFileUsingAzcopy( - getFileSystem().makeQualified(path).toUri().getPath().substring(1)); - } - @Test public void testRenameSrcFileInImplicitParentDirectory() throws Exception { AzureBlobFileSystem fs = getFileSystem(); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java index b9498be89a1ffe..4136da519ec913 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemFileStatus.java @@ -27,6 +27,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + /** * Test FileStatus. */ @@ -140,4 +142,158 @@ public void testLastModifiedTime() throws IOException { assertTrue("lastModifiedTime should be before createEndTime", createEndTime > lastModifiedTime); } + + @Test + public void testFileStatusOnFileWithImplicitParent() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + AzcopyHelper azcopyHelper = new AzcopyHelper( + getAccountName(), + getFileSystemName(), + getRawConfiguration(), + fs.getAbfsStore().getPrefixMode() + ); + + Path testPath = new Path("a/b.txt"); + azcopyHelper.createFileUsingAzcopy(fs.makeQualified(testPath).toUri().getPath().substring(1)); + + assertTrue("Parent directory is implicit.", + BlobDirectoryStateHelper.isImplicitDirectory(testPath.getParent(), fs)); + + // Assert getFileStatus Succeed on path + FileStatus fileStatus = fs.getFileStatus(testPath); + assertNotNull(fileStatus.getPath()); + assertFalse(fileStatus.isDirectory()); + assertNotEquals(0L, fileStatus.getLen()); + } + + @Test + public void testFileStatusOnFileWithExplicitParent() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path testPath = new Path("a/b.txt"); + fs.create(testPath); + + assertTrue("Parent directory is explicit.", + BlobDirectoryStateHelper.isExplicitDirectory(testPath.getParent(), fs)); + + FileStatus fileStatus = fs.getFileStatus(testPath); + assertNotNull(fileStatus.getPath()); + assertFalse(fileStatus.isDirectory()); + } + + @Test + public void testFileStatusOnImplicitDirWithImplicitParent() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + AzcopyHelper azcopyHelper = new AzcopyHelper( + getAccountName(), + getFileSystemName(), + getRawConfiguration(), + fs.getAbfsStore().getPrefixMode() + ); + + Path testPath = new Path("a/b"); + azcopyHelper.createFolderUsingAzcopy(fs.makeQualified(testPath).toUri().getPath().substring(1)); + + assertTrue("Path is implicit.", + BlobDirectoryStateHelper.isImplicitDirectory(testPath, fs)); + assertTrue("Parent directory is implicit.", + BlobDirectoryStateHelper.isImplicitDirectory(testPath.getParent(), fs)); + + // Assert that getFileStatus succeeds + FileStatus fileStatus = fs.getFileStatus(testPath); + assertNotNull(fileStatus.getPath()); + assertTrue(fileStatus.isDirectory()); + assertEquals(0L, fileStatus.getLen()); + } + + @Test + public void testFileStatusOnImplicitDirWithExplicitParent() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + AzcopyHelper azcopyHelper = new AzcopyHelper( + getAccountName(), + getFileSystemName(), + getRawConfiguration(), + fs.getAbfsStore().getPrefixMode() + ); + + Path testPath = new Path("a/b"); + azcopyHelper.createFolderUsingAzcopy(fs.makeQualified(testPath).toUri().getPath().substring(1)); + fs.mkdirs(testPath.getParent()); + + assertTrue("Path is implicit.", + BlobDirectoryStateHelper.isImplicitDirectory(testPath, fs)); + assertTrue("Parent directory is explicit.", + BlobDirectoryStateHelper.isExplicitDirectory(testPath.getParent(), fs)); + + // Assert that getFileStatus succeeds + FileStatus fileStatus = fs.getFileStatus(testPath); + assertNotNull(fileStatus.getPath()); + assertTrue(fileStatus.isDirectory()); + assertEquals(0L, fileStatus.getLen()); + } + + @Test + public void testFileStatusOnExplicitDirWithExplicitParent() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path testPath = new Path("a/b"); + fs.mkdirs(testPath); + + assertTrue("Parent directory is explicit.", + BlobDirectoryStateHelper.isExplicitDirectory(testPath.getParent(), fs)); + assertTrue("Path is explicit.", + BlobDirectoryStateHelper.isExplicitDirectory(testPath, fs)); + + // Assert that getFileStatus Succeeds + FileStatus fileStatus = fs.getFileStatus(testPath); + assertNotNull(fileStatus.getPath()); + assertTrue(fileStatus.isDirectory()); + assertEquals(0L, fileStatus.getLen()); + } + + @Test + public void testFileStatusOnNonExistingPathWithExplicitParent() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path testPath = new Path("a/b.txt"); + fs.mkdirs(testPath.getParent()); + + assertTrue("Parent directory is explicit.", + BlobDirectoryStateHelper.isExplicitDirectory(testPath.getParent(), fs)); + + // assert that getFileStatus fails + intercept(IOException.class, + () -> fs.getFileStatus(testPath)); + } + + @Test + public void testFileStatusOnNonExistingPathWithImplicitParent() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + AzcopyHelper azcopyHelper = new AzcopyHelper( + getAccountName(), + getFileSystemName(), + getRawConfiguration(), + fs.getAbfsStore().getPrefixMode() + ); + + Path testPath = new Path("a/b.txt"); + azcopyHelper.createFolderUsingAzcopy(fs.makeQualified( + testPath.getParent()).toUri().getPath().substring(1)); + + assertTrue("Parent directory is implicit.", + BlobDirectoryStateHelper.isImplicitDirectory(testPath.getParent(), fs)); + + // assert that getFileStatus Fails with IOException + intercept(IOException.class, + () -> fs.getFileStatus(testPath)); + } + + @Test + public void testFileStatusOnRoot() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + final Path path = new Path("/"); + fs.setWorkingDirectory(new Path("/")); + + // Assert that getFileSus on root path succeed. + FileStatus fileStatus = fs.getFileStatus(path); + assertTrue(fileStatus.isDirectory()); + assertTrue(fileStatus.getLen() == 0L); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java index 2ebdc7492a18d6..14e9df12f47038 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.util.concurrent.RejectedExecutionException; +import org.apache.hadoop.fs.azurebfs.services.AbfsBlobLease; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsLease; import org.apache.hadoop.fs.azurebfs.services.OperativeEndpoint; import org.apache.hadoop.fs.azurebfs.services.PrefixMode; import org.junit.Assert; @@ -347,8 +349,14 @@ public void testAcquireRetry() throws Exception { FSOperationType.TEST_OP, true, 0); tracingContext.setListener(listener); - AbfsLease lease = new AbfsLease(fs.getAbfsClient(), - testFilePath.toUri().getPath(), tracingContext); + AbfsLease lease; + if(getPrefixMode(fs) == PrefixMode.BLOB) { + lease = new AbfsBlobLease(fs.getAbfsClient(), + testFilePath.toUri().getPath(), null, tracingContext); + } else { + lease = new AbfsDfsLease(fs.getAbfsClient(), + testFilePath.toUri().getPath(), null, tracingContext); + } Assert.assertNotNull("Did not successfully lease file", lease.getLeaseID()); listener.setOperation(FSOperationType.RELEASE_LEASE); lease.free(); @@ -362,7 +370,18 @@ public void testAcquireRetry() throws Exception { .doCallRealMethod().when(mockClient) .acquireLease(anyString(), anyInt(), any(TracingContext.class)); - lease = new AbfsLease(mockClient, testFilePath.toUri().getPath(), 5, 1, tracingContext); + doThrow(new AbfsLease.LeaseException("failed to acquire 1")) + .doThrow(new AbfsLease.LeaseException("failed to acquire 2")) + .doCallRealMethod().when(mockClient) + .acquireBlobLease(anyString(), anyInt(), any(TracingContext.class)); + + if(getPrefixMode(fs) == PrefixMode.BLOB) { + lease = new AbfsBlobLease(mockClient, testFilePath.toUri().getPath(), 5, 1, null, + tracingContext); + } else { + lease = new AbfsDfsLease(mockClient, testFilePath.toUri().getPath(), 5, 1, null, + tracingContext); + } Assert.assertNotNull("Acquire lease should have retried", lease.getLeaseID()); lease.free(); Assert.assertEquals("Unexpected acquire retry count", 2, lease.getAcquireRetryCount()); @@ -370,9 +389,17 @@ public void testAcquireRetry() throws Exception { doThrow(new AbfsLease.LeaseException("failed to acquire")).when(mockClient) .acquireLease(anyString(), anyInt(), any(TracingContext.class)); + doThrow(new AbfsLease.LeaseException("failed to acquire")).when(mockClient) + .acquireBlobLease(anyString(), anyInt(), any(TracingContext.class)); + LambdaTestUtils.intercept(AzureBlobFileSystemException.class, () -> { - new AbfsLease(mockClient, testFilePath.toUri().getPath(), 5, 1, - tracingContext); + if(getPrefixMode(fs) == PrefixMode.BLOB) { + new AbfsBlobLease(mockClient, testFilePath.toUri().getPath(), 5, 1, null, + tracingContext); + } else { + new AbfsDfsLease(mockClient, testFilePath.toUri().getPath(), 5, 1, null, + tracingContext); + } }); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java index 6bed7839935f7c..acdcf66942ab78 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemMkDir.java @@ -20,8 +20,12 @@ import java.util.UUID; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.OperativeEndpoint; import org.apache.hadoop.fs.azurebfs.services.PrefixMode; +import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.junit.Assume; import org.junit.Test; @@ -29,6 +33,7 @@ import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.mockito.Mockito; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_MKDIR_OVERWRITE; @@ -152,4 +157,29 @@ public void testCreateDirOverwrite(boolean enableConditionalCreateOverwrite) totalConnectionMadeBeforeTest + mkdirRequestCount, fs.getInstrumentationMap()); } + + @Test + public void testVerifyGetBlobProperty() throws Exception { + Assume.assumeTrue(getFileSystem().getAbfsStore().getPrefixMode() == PrefixMode.BLOB); + AzureBlobFileSystem fs = Mockito.spy(getFileSystem()); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + Mockito.doReturn(store).when(fs).getAbfsStore(); + AbfsClient client = store.getClient(); + AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext( + client, + fs.getAbfsStore().getAbfsConfiguration())); + store.setClient(testClient); + + createAzCopyDirectory(new Path("/src")); + intercept(AbfsRestOperationException.class, () -> { + store.getBlobProperty(new Path("/src"), Mockito.mock( + TracingContext.class)); + }); + fs.mkdirs(new Path("/src/dir")); + Mockito.verify(testClient, Mockito.times(0)).getPathStatus(Mockito.any(String.class), + Mockito.anyBoolean(), Mockito.any(TracingContext.class)); + Mockito.verify(testClient, Mockito.times(1)).getBlobProperty(Mockito.any(Path.class), + Mockito.any(TracingContext.class)); + + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java index 0a9713cffa94b5..b4a8582c57d78c 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java @@ -18,14 +18,20 @@ package org.apache.hadoop.fs.azurebfs; import java.io.EOFException; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.Random; import java.util.concurrent.Callable; import java.util.UUID; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.PrefixMode; +import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.junit.Assume; import org.junit.Ignore; import org.junit.Test; +import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -574,6 +580,46 @@ public void testAlwaysReadBufferSizeConfig(boolean alwaysReadBufferSizeConfigVal assertAbfsStatistics(BYTES_RECEIVED, dateSizeReadStatAtStart + newDataSizeRead, fs.getInstrumentationMap()); } + @Test + public void testReadBlob() throws IOException { + Assume.assumeTrue(PrefixMode.BLOB == getFileSystem().getAbfsStore().getPrefixMode()); + AzureBlobFileSystem fs = Mockito.spy(getFileSystem()); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + AbfsClient client = store.getClient(); + AbfsClient mockClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext( + client, + fs.getAbfsStore().getAbfsConfiguration() + )); + store.setClient(mockClient); + Mockito.doReturn(mockClient).when(store).getClient(); + Mockito.doReturn(store).when(fs).getAbfsStore(); + + Path testPath = new Path("/testReadFile"); + fs.create(testPath); + FSDataInputStream in = fs.open(testPath); + Mockito.verify(mockClient, Mockito.atLeast(1)).getBlobProperty( + Mockito.any(Path.class), Mockito.any(TracingContext.class)); + Mockito.verify(mockClient, Mockito.times(0)).getPathStatus( + Mockito.any(String.class), Mockito.anyBoolean(), Mockito.any(TracingContext.class)); + } + + @Test + public void testInvalidImplicitDirRead() throws Exception { + AzureBlobFileSystem fs = (AzureBlobFileSystem) getFileSystem(); + AzcopyHelper azcopyhelper = new AzcopyHelper(getAccountName(), + getFileSystemName(), + getRawConfiguration(), + fs.getAbfsStore().getPrefixMode()); + String fullPath = "/implicitDirPath/testFile"; + String path = "/implicitDirPath"; + azcopyhelper.createFolderUsingAzcopy( + fs.makeQualified(new Path(fullPath)).toUri().getPath().substring(1) + ); + + intercept(FileNotFoundException.class, () -> + fs.open(new Path(path))); + + } private long sequentialRead(String version, Path testPath, FileSystem fs, diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java index 5057664ab2af17..a3eebf3e9dc9cc 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRename.java @@ -61,6 +61,7 @@ import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.services.PrefixMode; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_LEASE_CREATE_NON_RECURSIVE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_REDIRECT_RENAME; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_INGRESS_FALLBACK_TO_DFS; @@ -1735,8 +1736,10 @@ public void testParallelAppendToFileBeingCopiedInAtomicDirectory() @Test public void testParallelCreateNonRecursiveToFilePartOfAtomicDirectoryInRename() throws Exception { - FileSystem fsCreate = FileSystem.newInstance(getRawConfiguration()); - AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(getRawConfiguration()); + Configuration configuration = Mockito.spy(getRawConfiguration()); + configuration.set(FS_AZURE_LEASE_CREATE_NON_RECURSIVE, "true"); + FileSystem fsCreate = FileSystem.newInstance(configuration); + AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration); assumeNonHnsAccountBlobEndpoint(fs); fs.setWorkingDirectory(new Path("/")); fs.mkdirs(new Path("/hbase/dir1")); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java index 2c0bd31bf8eeb8..8ef5f1d4516703 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestCustomerProvidedKey.java @@ -202,7 +202,7 @@ public void testAppendWithCPK() throws Exception { // Trying to append with correct CPK headers AppendRequestParameters appendRequestParameters = new AppendRequestParameters( - 0, 0, 5, Mode.APPEND_MODE, false, null); + 0, 0, 5, Mode.APPEND_MODE, false, null, true); byte[] buffer = getRandomBytesArray(5); AbfsClient abfsClient = fs.getAbfsClient(); AbfsRestOperation abfsRestOperation = abfsClient @@ -247,7 +247,7 @@ public void testAppendWithoutCPK() throws Exception { // Trying to append without CPK headers AppendRequestParameters appendRequestParameters = new AppendRequestParameters( - 0, 0, 5, Mode.APPEND_MODE, false, null); + 0, 0, 5, Mode.APPEND_MODE, false, null, true); byte[] buffer = getRandomBytesArray(5); AbfsClient abfsClient = fs.getAbfsClient(); AbfsRestOperation abfsRestOperation = abfsClient diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java index ccfdb72cdb0c98..fbfd6884bb363a 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java @@ -32,6 +32,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.enums.Trilean; +import org.apache.hadoop.fs.azurebfs.services.PrefixMode; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_DNS_PREFIX; @@ -142,11 +143,20 @@ public void testFailedRequestWhenFSNotExist() throws Exception { + testUri.substring(testUri.indexOf("@")); AzureBlobFileSystem fs = this.getFileSystem(nonExistingFsUrl); - intercept(FileNotFoundException.class, - "\"The specified filesystem does not exist.\", 404", - ()-> { - fs.getFileStatus(new Path("/")); // Run a dummy FS call - }); + if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) { + intercept(FileNotFoundException.class, + "\"The specified container does not exist.\", 404", + ()-> { + fs.getFileStatus(new Path("/")); // Run a dummy FS call + }); + } + else { + intercept(FileNotFoundException.class, + "\"The specified filesystem does not exist.\", 404", + ()-> { + fs.getFileStatus(new Path("/")); // Run a dummy FS call + }); + } } @Test diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlobProducer.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlobProducer.java index 0093f699ce5110..541e17870931bf 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlobProducer.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlobProducer.java @@ -19,6 +19,12 @@ package org.apache.hadoop.fs.azurebfs; import java.net.HttpURLConnection; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -34,13 +40,14 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; +import org.apache.hadoop.fs.azurebfs.services.BlobProperty; import org.apache.hadoop.fs.azurebfs.services.ListBlobConsumer; import org.apache.hadoop.fs.azurebfs.services.ListBlobProducer; import org.apache.hadoop.fs.azurebfs.services.ListBlobQueue; import org.apache.hadoop.fs.azurebfs.services.PrefixMode; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_MAX_CONSUMER_LAG; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_PRODUCER_QUEUE_MAX_SIZE; public class ITestListBlobProducer extends AbstractAbfsIntegrationTest { @@ -59,7 +66,7 @@ public void setup() throws Exception { @Test public void testProducerWaitingForConsumerLagToGoDown() throws Exception { Configuration configuration = Mockito.spy(getRawConfiguration()); - configuration.set(FS_AZURE_MAX_CONSUMER_LAG, "10"); + configuration.set(FS_AZURE_PRODUCER_QUEUE_MAX_SIZE, "10"); AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance( configuration); AbfsClient client = fs.getAbfsClient(); @@ -67,57 +74,67 @@ public void testProducerWaitingForConsumerLagToGoDown() throws Exception { fs.getAbfsStore().setClient(spiedClient); fs.setWorkingDirectory(new Path("/")); fs.mkdirs(new Path("/src")); + ExecutorService executor = Executors.newFixedThreadPool(5); + List futureList = new ArrayList<>(); for (int i = 0; i < 20; i++) { - fs.create(new Path("/src/file" + i)); + int iter = i; + futureList.add(executor.submit(() -> { + return fs.create(new Path("/src/file" + iter)); + })); + } + for(Future future : futureList) { + future.get(); } - AtomicBoolean produced = new AtomicBoolean(true); AtomicInteger producedBlobs = new AtomicInteger(0); AtomicInteger listBlobInvoked = new AtomicInteger(0); + final ITestListBlobProducer testObj = this; + final ListBlobQueue queue = new ListBlobQueue( + fs.getAbfsStore().getAbfsConfiguration().getProducerQueueMaxSize(), + 1); + Mockito.doAnswer(answer -> { - listBlobInvoked.incrementAndGet(); - AbfsRestOperation op = client.getListBlobs(answer.getArgument(0), - answer.getArgument(1), 1, answer.getArgument(3)); - producedBlobs.incrementAndGet(); - produced.set(true); - return op; + synchronized (testObj) { + listBlobInvoked.incrementAndGet(); + AbfsRestOperation op = client.getListBlobs(answer.getArgument(0), + answer.getArgument(1), 1, answer.getArgument(3)); + producedBlobs.incrementAndGet(); + if(producedBlobs.get() > 10) { + Assert.assertTrue(queue.availableSize() > 0); + } + return op; + } }) .when(spiedClient) .getListBlobs(Mockito.nullable(String.class), Mockito.nullable(String.class), Mockito.nullable(Integer.class), Mockito.nullable(TracingContext.class)); - ListBlobQueue queue = new ListBlobQueue(null); + ListBlobProducer producer = new ListBlobProducer("src/", spiedClient, queue, null, Mockito.mock( TracingContext.class)); ListBlobConsumer consumer = new ListBlobConsumer(queue); while (producedBlobs.get() < 10) ; - int producedBlobCount = producedBlobs.get(); - int oldInvocation = listBlobInvoked.get(); - Thread.sleep(10_000L); Assert.assertTrue(listBlobInvoked.get() == oldInvocation); while (!consumer.isCompleted()) { - produced.set(false); - consumer.consume(); - while (!produced.get() && !queue.getIsCompleted()) ; - if (!queue.getIsCompleted()) { - Assert.assertEquals(producedBlobs.get() - 1, producedBlobCount); + synchronized (testObj) { + consumer.consume(); + Assert.assertTrue(queue.availableSize() > 0); } - producedBlobCount = producedBlobs.get(); } - Assert.assertTrue(producedBlobCount == 20); + Assert.assertTrue(producedBlobs.get() == 20); } @Test public void testConsumerWhenProducerThrowException() throws Exception { Configuration configuration = Mockito.spy(getRawConfiguration()); - configuration.set(FS_AZURE_MAX_CONSUMER_LAG, "10"); + configuration.set(FS_AZURE_PRODUCER_QUEUE_MAX_SIZE, "10"); AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance( configuration); AbfsClient client = fs.getAbfsClient(); @@ -139,7 +156,8 @@ public void testConsumerWhenProducerThrowException() throws Exception { Mockito.nullable(String.class), Mockito.nullable(Integer.class), Mockito.nullable(TracingContext.class)); - ListBlobQueue queue = new ListBlobQueue(null); + ListBlobQueue queue = new ListBlobQueue(getConfiguration().getProducerQueueMaxSize(), + getConfiguration().getProducerQueueMaxSize()); ListBlobProducer producer = new ListBlobProducer("src/", spiedClient, queue, null, Mockito.mock( TracingContext.class)); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java index 565eb38c4f70a7..9e40f22d231b05 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java @@ -24,6 +24,9 @@ public final class TestConfigurationKeys { public static final String FS_AZURE_ACCOUNT_NAME = "fs.azure.account.name"; public static final String FS_AZURE_ABFS_ACCOUNT_NAME = "fs.azure.abfs.account.name"; + public static final String FS_AZURE_ABFS_ACCOUNT1_NAME = "fs.azure.abfs.account1.name"; + public static final String FS_AZURE_ENABLE_AUTOTHROTTLING = "fs.azure.enable.autothrottling"; + public static final String FS_AZURE_ANALYSIS_PERIOD = "fs.azure.analysis.period"; public static final String FS_AZURE_ACCOUNT_KEY = "fs.azure.account.key"; public static final String FS_AZURE_CONTRACT_TEST_URI = "fs.contract.test.fs.abfs"; public static final String FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT = "fs.azure.test.namespace.enabled"; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationTestUtil.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationTestUtil.java index d1c661eea3b780..56556b1930566d 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationTestUtil.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperationTestUtil.java @@ -41,7 +41,7 @@ public static void addAbfsHttpOpProcessResponseMock(final AbfsRestOperation spie spiedRestOp.getMethod(), spiedRestOp.getRequestHeaders()); AbfsHttpOperation spiedOp = Mockito.spy(op); return functionRaisingIOE.apply(spiedOp, actualOp); - }).when(spiedRestOp).createNewHttpOperation(); + }).when(spiedRestOp).createHttpOperation(); } public static void setResult(final AbfsRestOperation op, final AbfsHttpOperation result) { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java similarity index 62% rename from hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java rename to hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index 8dfef876561f73..e798a4baa36ab0 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -20,21 +20,44 @@ import java.io.IOException; import java.lang.reflect.Field; +import java.net.HttpURLConnection; +import java.net.ProtocolException; import java.net.URL; import java.util.List; +import java.util.Random; import java.util.regex.Pattern; import org.junit.Ignore; +import org.assertj.core.api.Assertions; import org.junit.Test; +import org.mockito.Mockito; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.TestAbfsConfigurationFieldsValidation; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; -import static org.assertj.core.api.Assertions.assertThat; +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_POSITION; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -60,14 +83,19 @@ * Test useragent of abfs client. * */ -public final class TestAbfsClient { +public final class ITestAbfsClient extends AbstractAbfsIntegrationTest { private static final String ACCOUNT_NAME = "bogusAccountName.dfs.core.windows.net"; private static final String FS_AZURE_USER_AGENT_PREFIX = "Partner Service"; + private static final String TEST_PATH = "/testfile"; + public static final int REDUCED_RETRY_COUNT = 2; + public static final int REDUCED_BACKOFF_INTERVAL = 100; + public static final int BUFFER_LENGTH = 5; + public static final int BUFFER_OFFSET = 0; private final Pattern userAgentStringPattern; - public TestAbfsClient(){ + public ITestAbfsClient() throws Exception { StringBuilder regEx = new StringBuilder(); regEx.append("^"); regEx.append(APN_VERSION); @@ -125,7 +153,7 @@ public void verifybBasicInfo() throws Exception { } private void verifybBasicInfo(String userAgentStr) { - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string [" + userAgentStr + "] should be of the pattern: " + this.userAgentStringPattern.pattern()) .matches(this.userAgentStringPattern) @@ -155,7 +183,7 @@ public void verifyUserAgentPrefix() String userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should contain " + FS_AZURE_USER_AGENT_PREFIX) .contains(FS_AZURE_USER_AGENT_PREFIX); @@ -165,7 +193,7 @@ public void verifyUserAgentPrefix() userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should not contain " + FS_AZURE_USER_AGENT_PREFIX) .doesNotContain(FS_AZURE_USER_AGENT_PREFIX); } @@ -181,14 +209,14 @@ public void verifyUserAgentWithoutSSLProvider() throws Exception { String userAgentStr = getUserAgentString(abfsConfiguration, true); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should contain sslProvider") .contains(DelegatingSSLSocketFactory.getDefaultFactory().getProviderName()); userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should not contain sslProvider") .doesNotContain(DelegatingSSLSocketFactory.getDefaultFactory().getProviderName()); } @@ -204,7 +232,7 @@ public void verifyUserAgentClusterName() throws Exception { String userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should contain cluster name") .contains(clusterName); @@ -214,7 +242,7 @@ public void verifyUserAgentClusterName() throws Exception { userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should not contain cluster name") .doesNotContain(clusterName) .describedAs("User-Agent string should contain UNKNOWN as cluster name config is absent") @@ -232,7 +260,7 @@ public void verifyUserAgentClusterType() throws Exception { String userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should contain cluster type") .contains(clusterType); @@ -242,7 +270,7 @@ public void verifyUserAgentClusterType() throws Exception { userAgentStr = getUserAgentString(abfsConfiguration, false); verifybBasicInfo(userAgentStr); - assertThat(userAgentStr) + Assertions.assertThat(userAgentStr) .describedAs("User-Agent string should not contain cluster type") .doesNotContain(clusterType) .describedAs("User-Agent string should contain UNKNOWN as cluster type config is absent") @@ -308,24 +336,28 @@ public static AbfsClient getMockAbfsClient(AbfsClient baseAbfsClientInstance, when(client.getAccessToken()).thenCallRealMethod(); when(client.getSharedKeyCredentials()).thenCallRealMethod(); when(client.createDefaultHeaders()).thenCallRealMethod(); - + when(client.getAbfsConfiguration()).thenReturn(abfsConfig); + when(client.getIntercept()).thenReturn( + AbfsThrottlingInterceptFactory.getInstance( + abfsConfig.getAccountName().substring(0, + abfsConfig.getAccountName().indexOf(DOT)), abfsConfig)); // override baseurl - client = TestAbfsClient.setAbfsClientField(client, "abfsConfiguration", + client = ITestAbfsClient.setAbfsClientField(client, "abfsConfiguration", abfsConfig); // override baseurl - client = TestAbfsClient.setAbfsClientField(client, "baseUrl", + client = ITestAbfsClient.setAbfsClientField(client, "baseUrl", baseAbfsClientInstance.getBaseUrl()); // override auth provider if (currentAuthType == AuthType.SharedKey) { - client = TestAbfsClient.setAbfsClientField(client, "sharedKeyCredentials", + client = ITestAbfsClient.setAbfsClientField(client, "sharedKeyCredentials", new SharedKeyCredentials( abfsConfig.getAccountName().substring(0, abfsConfig.getAccountName().indexOf(DOT)), abfsConfig.getStorageAccountKey())); } else { - client = TestAbfsClient.setAbfsClientField(client, "tokenProvider", + client = ITestAbfsClient.setAbfsClientField(client, "tokenProvider", abfsConfig.getTokenProvider()); } @@ -333,7 +365,7 @@ public static AbfsClient getMockAbfsClient(AbfsClient baseAbfsClientInstance, String userAgent = "APN/1.0 Azure Blob FS/3.4.0-SNAPSHOT (PrivateBuild " + "JavaJRE 1.8.0_252; Linux 5.3.0-59-generic/amd64; openssl-1.0; " + "UNKNOWN/UNKNOWN) MSFT"; - client = TestAbfsClient.setAbfsClientField(client, "userAgent", userAgent); + client = ITestAbfsClient.setAbfsClientField(client, "userAgent", userAgent); return client; } @@ -397,4 +429,160 @@ public static AbfsRestOperation getRestOp(AbfsRestOperationType type, url, requestHeaders); } + + public static AccessTokenProvider getAccessTokenProvider(AbfsClient client) { + return client.getTokenProvider(); + } + + /** + * Test helper method to get random bytes array. + * @param length The length of byte buffer. + * @return byte buffer. + */ + private byte[] getRandomBytesArray(int length) { + final byte[] b = new byte[length]; + new Random().nextBytes(b); + return b; + } + + /** + * Test to verify that client retries append request without + * expect header enabled if append with expect header enabled fails + * with 4xx kind of error. + * @throws Exception + */ + @Test + public void testExpectHundredContinue() throws Exception { + // Get the filesystem. + final AzureBlobFileSystem fs = getFileSystem(); + + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + AbfsClient abfsClient = getClient(fs); + + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + configuration.get(FS_AZURE_ABFS_ACCOUNT_NAME)); + + // Update the configuration with reduced retry count and reduced backoff interval. + AbfsConfiguration abfsConfig + = TestAbfsConfigurationFieldsValidation.updateRetryConfigs( + abfsConfiguration, + REDUCED_RETRY_COUNT, REDUCED_BACKOFF_INTERVAL); + + // Gets the client. + AbfsClient testClient = Mockito.spy( + ITestAbfsClient.createTestClientFromCurrentContext( + abfsClient, + abfsConfig)); + + // Create the append request params with expect header enabled initially. + AppendRequestParameters appendRequestParameters + = new AppendRequestParameters( + BUFFER_OFFSET, BUFFER_OFFSET, BUFFER_LENGTH, + AppendRequestParameters.Mode.APPEND_MODE, false, null, true); + + byte[] buffer = getRandomBytesArray(BUFFER_LENGTH); + + // Create a test container to upload the data. + Path testPath = path(TEST_PATH); + fs.create(testPath); + String finalTestPath = testPath.toString() + .substring(testPath.toString().lastIndexOf("/")); + + // Creates a list of request headers. + final List requestHeaders + = ITestAbfsClient.getTestRequestHeaders(testClient); + requestHeaders.add( + new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH)); + if (appendRequestParameters.isExpectHeaderEnabled()) { + requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); + } + + // Updates the query parameters. + final AbfsUriQueryBuilder abfsUriQueryBuilder + = testClient.createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, APPEND_ACTION); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, + Long.toString(appendRequestParameters.getPosition())); + + // Creates the url for the specified path. + URL url = testClient.createRequestUrl(finalTestPath, abfsUriQueryBuilder.toString()); + + // Create a mock of the AbfsRestOperation to set the urlConnection in the corresponding httpOperation. + AbfsRestOperation op = Mockito.spy(new AbfsRestOperation( + AbfsRestOperationType.Append, + testClient, + HTTP_METHOD_PUT, + url, + requestHeaders, buffer, + appendRequestParameters.getoffset(), + appendRequestParameters.getLength(), null)); + + AbfsHttpOperation abfsHttpOperation = Mockito.spy(new AbfsHttpOperation(url, + HTTP_METHOD_PUT, requestHeaders)); + + // Sets the expect request property if expect header is enabled. + if (appendRequestParameters.isExpectHeaderEnabled()) { + Mockito.doReturn(HUNDRED_CONTINUE).when(abfsHttpOperation) + .getConnProperty(EXPECT); + } + + HttpURLConnection urlConnection = mock(HttpURLConnection.class); + Mockito.doNothing().when(urlConnection).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(HTTP_METHOD_PUT).when(urlConnection).getRequestMethod(); + Mockito.doReturn(url).when(urlConnection).getURL(); + Mockito.doReturn(urlConnection).when(abfsHttpOperation).getConnection(); + + Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(url).when(abfsHttpOperation).getConnUrl(); + + // Give user error code 404 when processResponse is called. + Mockito.doReturn(HTTP_METHOD_PUT).when(abfsHttpOperation).getConnRequestMethod(); + Mockito.doReturn(HTTP_NOT_FOUND).when(abfsHttpOperation).getConnResponseCode(); + Mockito.doReturn("Resource Not Found") + .when(abfsHttpOperation) + .getConnResponseMessage(); + + // Make the getOutputStream throw IOException to see it returns from the sendRequest correctly. + Mockito.doThrow(new ProtocolException("Server rejected Operation")) + .when(abfsHttpOperation) + .getConnOutputStream(); + + // Sets the httpOperation for the rest operation. + Mockito.doReturn(abfsHttpOperation) + .when(op) + .createHttpOperation(); + + // Mock the restOperation for the client. + Mockito.doReturn(op) + .when(testClient) + .getAbfsRestOperationForAppend(Mockito.any(), + Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any(), + Mockito.nullable(int.class), Mockito.nullable(int.class), + Mockito.any()); + + TracingContext tracingContext = Mockito.spy(new TracingContext("abcd", + "abcde", FSOperationType.APPEND, + TracingHeaderFormat.ALL_ID_FORMAT, null)); + + // Check that expect header is enabled before the append call. + Assertions.assertThat(appendRequestParameters.isExpectHeaderEnabled()) + .describedAs("The expect header is not true before the append call") + .isTrue(); + + intercept(AzureBlobFileSystemException.class, + () -> testClient.append(finalTestPath, buffer, appendRequestParameters, null, tracingContext)); + + // Verify that the request was not exponentially retried because of user error. + Assertions.assertThat(tracingContext.getRetryCount()) + .describedAs("The retry count is incorrect") + .isEqualTo(0); + + // Verify that the same request was retried with expect header disabled. + Assertions.assertThat(appendRequestParameters.isExpectHeaderEnabled()) + .describedAs("The expect header is not false") + .isFalse(); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java new file mode 100644 index 00000000000000..fe3c2a9892c4c6 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java @@ -0,0 +1,358 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.HttpURLConnection; +import java.net.ProtocolException; +import java.net.URL; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.mockito.Mockito; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.TestAbfsConfigurationFieldsValidation; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; + +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static java.net.HttpURLConnection.HTTP_OK; +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_POSITION; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; + +@RunWith(Parameterized.class) +public class ITestAbfsRestOperation extends AbstractAbfsIntegrationTest { + + // Specifies whether getOutputStream() or write() throws IOException. + public enum ErrorType {OUTPUTSTREAM, WRITE}; + + private static final int HTTP_EXPECTATION_FAILED = 417; + private static final int HTTP_ERROR = 0; + private static final int ZERO = 0; + private static final int REDUCED_RETRY_COUNT = 2; + private static final int REDUCED_BACKOFF_INTERVAL = 100; + private static final int BUFFER_LENGTH = 5; + private static final int BUFFER_OFFSET = 0; + private static final String TEST_PATH = "/testfile"; + + // Specifies whether the expect header is enabled or not. + @Parameterized.Parameter + public boolean expectHeaderEnabled; + + // Gives the http response code. + @Parameterized.Parameter(1) + public int responseCode; + + // Gives the http response message. + @Parameterized.Parameter(2) + public String responseMessage; + + // Gives the errorType based on the enum. + @Parameterized.Parameter(3) + public ErrorType errorType; + + // The intercept. + private AbfsThrottlingIntercept intercept; + + /* + HTTP_OK = 200, + HTTP_UNAVAILABLE = 503, + HTTP_NOT_FOUND = 404, + HTTP_EXPECTATION_FAILED = 417, + HTTP_ERROR = 0. + */ + @Parameterized.Parameters(name = "expect={0}-code={1}-ErrorType={3}") + public static Iterable params() { + return Arrays.asList(new Object[][]{ + {true, HTTP_OK, "OK", ErrorType.WRITE}, + {false, HTTP_OK, "OK", ErrorType.WRITE}, + {true, HTTP_UNAVAILABLE, "ServerBusy", ErrorType.OUTPUTSTREAM}, + {true, HTTP_NOT_FOUND, "Resource Not Found", ErrorType.OUTPUTSTREAM}, + {true, HTTP_EXPECTATION_FAILED, "Expectation Failed", ErrorType.OUTPUTSTREAM}, + {true, HTTP_ERROR, "Error", ErrorType.OUTPUTSTREAM} + }); + } + + public ITestAbfsRestOperation() throws Exception { + super(); + } + + /** + * Test helper method to get random bytes array. + * @param length The length of byte buffer + * @return byte buffer + */ + private byte[] getRandomBytesArray(int length) { + final byte[] b = new byte[length]; + new Random().nextBytes(b); + return b; + } + + /** + * Gives the AbfsRestOperation. + * @return abfsRestOperation. + */ + private AbfsRestOperation getRestOperation() throws Exception { + // Get the filesystem. + final AzureBlobFileSystem fs = getFileSystem(); + + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + AbfsClient abfsClient = getClient(fs); + + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + configuration.get(FS_AZURE_ABFS_ACCOUNT_NAME)); + + // Update the configuration with reduced retry count and reduced backoff interval. + AbfsConfiguration abfsConfig + = TestAbfsConfigurationFieldsValidation.updateRetryConfigs( + abfsConfiguration, + REDUCED_RETRY_COUNT, REDUCED_BACKOFF_INTERVAL); + + intercept = Mockito.mock(AbfsThrottlingIntercept.class); + Mockito.doNothing().when(intercept).updateMetrics(Mockito.any(), Mockito.any()); + + // Gets the client. + AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext( + abfsClient, + abfsConfig)); + + Mockito.doReturn(intercept).when(testClient).getIntercept(); + + // Expect header is enabled or not based on the parameter. + AppendRequestParameters appendRequestParameters + = new AppendRequestParameters( + BUFFER_OFFSET, BUFFER_OFFSET, BUFFER_LENGTH, + AppendRequestParameters.Mode.APPEND_MODE, false, null, + expectHeaderEnabled); + + byte[] buffer = getRandomBytesArray(5); + + // Create a test container to upload the data. + Path testPath = path(TEST_PATH); + fs.create(testPath); + String finalTestPath = testPath.toString().substring(testPath.toString().lastIndexOf("/")); + + // Creates a list of request headers. + final List requestHeaders = ITestAbfsClient.getTestRequestHeaders(testClient); + requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH)); + if (appendRequestParameters.isExpectHeaderEnabled()) { + requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); + } + + // Updates the query parameters. + final AbfsUriQueryBuilder abfsUriQueryBuilder = testClient.createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, APPEND_ACTION); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(appendRequestParameters.getPosition())); + + // Creates the url for the specified path. + URL url = testClient.createRequestUrl(finalTestPath, abfsUriQueryBuilder.toString()); + + // Create a mock of the AbfsRestOperation to set the urlConnection in the corresponding httpOperation. + AbfsRestOperation op = Mockito.spy(new AbfsRestOperation( + AbfsRestOperationType.Append, + testClient, + HTTP_METHOD_PUT, + url, + requestHeaders, buffer, + appendRequestParameters.getoffset(), + appendRequestParameters.getLength(), null)); + + AbfsHttpOperation abfsHttpOperation = Mockito.spy(new AbfsHttpOperation(url, HTTP_METHOD_PUT, requestHeaders)); + + // Sets the expect request property if expect header is enabled. + if (expectHeaderEnabled) { + Mockito.doReturn(HUNDRED_CONTINUE) + .when(abfsHttpOperation) + .getConnProperty(EXPECT); + } + + HttpURLConnection urlConnection = mock(HttpURLConnection.class); + Mockito.doNothing().when(urlConnection).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(HTTP_METHOD_PUT).when(urlConnection).getRequestMethod(); + Mockito.doReturn(url).when(urlConnection).getURL(); + Mockito.doReturn(urlConnection).when(abfsHttpOperation).getConnection(); + + Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(url).when(abfsHttpOperation).getConnUrl(); + Mockito.doReturn(HTTP_METHOD_PUT).when(abfsHttpOperation).getConnRequestMethod(); + + switch (errorType) { + case OUTPUTSTREAM: + // If the getOutputStream() throws IOException and Expect Header is + // enabled, it returns back to processResponse and hence we have + // mocked the response code and the response message to check different + // behaviour based on response code. + Mockito.doReturn(responseCode).when(abfsHttpOperation).getConnResponseCode(); + Mockito.doReturn(responseMessage) + .when(abfsHttpOperation) + .getConnResponseMessage(); + Mockito.doThrow(new ProtocolException("Server rejected Operation")) + .when(abfsHttpOperation) + .getConnOutputStream(); + break; + case WRITE: + // If write() throws IOException and Expect Header is + // enabled or not, it should throw back the exception. + OutputStream outputStream = Mockito.spy(new OutputStream() { + @Override + public void write(final int i) throws IOException { + } + }); + Mockito.doReturn(outputStream).when(abfsHttpOperation).getConnOutputStream(); + Mockito.doThrow(new IOException()) + .when(outputStream) + .write(buffer, appendRequestParameters.getoffset(), + appendRequestParameters.getLength()); + break; + default: + break; + } + + // Sets the httpOperation for the rest operation. + Mockito.doReturn(abfsHttpOperation) + .when(op) + .createHttpOperation(); + return op; + } + + void assertTraceContextState(int retryCount, int assertRetryCount, int bytesSent, int assertBytesSent, + int expectedBytesSent, int assertExpectedBytesSent) { + // Assert that the request is retried or not. + Assertions.assertThat(retryCount) + .describedAs("The retry count is incorrect") + .isEqualTo(assertRetryCount); + + // Assert that metrics will be updated correctly. + Assertions.assertThat(bytesSent) + .describedAs("The bytes sent is incorrect") + .isEqualTo(assertBytesSent); + Assertions.assertThat(expectedBytesSent) + .describedAs("The expected bytes sent is incorrect") + .isEqualTo(assertExpectedBytesSent); + } + + /** + * Test the functionalities based on whether getOutputStream() or write() + * throws exception and what is the corresponding response code. + */ + @Test + public void testExpectHundredContinue() throws Exception { + // Gets the AbfsRestOperation. + AbfsRestOperation op = getRestOperation(); + AbfsHttpOperation httpOperation = op.createHttpOperation(); + + TracingContext tracingContext = Mockito.spy(new TracingContext("abcd", + "abcde", FSOperationType.APPEND, + TracingHeaderFormat.ALL_ID_FORMAT, null)); + + switch (errorType) { + case WRITE: + // If write() throws IOException and Expect Header is + // enabled or not, it should throw back the exception + // which is caught and exponential retry logic comes into place. + intercept(IOException.class, + () -> op.execute(tracingContext)); + + // Asserting update of metrics and retries. + assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, httpOperation.getBytesSent(), BUFFER_LENGTH, + 0, 0); + break; + case OUTPUTSTREAM: + switch (responseCode) { + case HTTP_UNAVAILABLE: + // In the case of 503 i.e. throttled case, we should retry. + intercept(IOException.class, + () -> op.execute(tracingContext)); + + // Asserting update of metrics and retries. + assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, httpOperation.getBytesSent(), ZERO, + httpOperation.getExpectedBytesToBeSent(), BUFFER_LENGTH); + + // Verifies that update Metrics call is made for throttle case and for the first without retry + + // for the retried cases as well. + Mockito.verify(intercept, times(REDUCED_RETRY_COUNT + 1)) + .updateMetrics(Mockito.any(), Mockito.any()); + break; + case HTTP_ERROR: + // In the case of http status code 0 i.e. ErrorType case, we should retry. + intercept(IOException.class, + () -> op.execute(tracingContext)); + + // Asserting update of metrics and retries. + assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, httpOperation.getBytesSent(), + ZERO, 0, 0); + + // Verifies that update Metrics call is made for ErrorType case and for the first without retry + + // for the retried cases as well. + Mockito.verify(intercept, times(REDUCED_RETRY_COUNT + 1)) + .updateMetrics(Mockito.any(), Mockito.any()); + break; + case HTTP_NOT_FOUND: + case HTTP_EXPECTATION_FAILED: + // In the case of 4xx ErrorType. i.e. user ErrorType, retry should not happen. + intercept(AzureBlobFileSystemException.class, + () -> op.execute(tracingContext)); + + // Asserting update of metrics and retries. + assertTraceContextState(tracingContext.getRetryCount(), ZERO, 0, + 0, 0, 0); + + // Verifies that update Metrics call is not made for user ErrorType case. + Mockito.verify(intercept, never()) + .updateMetrics(Mockito.any(), Mockito.any()); + break; + default: + break; + } + break; + default: + break; + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestBlobOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestBlobOperation.java index 5530fb0e1b9cd9..a0b116ab0593a2 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestBlobOperation.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestBlobOperation.java @@ -92,7 +92,7 @@ public void testPutBlob() throws Exception { configuration.get(FS_AZURE_ABFS_ACCOUNT_NAME)); // Gets the client. - AbfsClient testClient = Mockito.spy(TestAbfsClient.createTestClientFromCurrentContext( + AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext( abfsClient, abfsConfiguration)); @@ -103,7 +103,7 @@ public void testPutBlob() throws Exception { String finalTestPath = testPath.toString().substring(testPath.toString().lastIndexOf("/")); // Creates a list of request headers. - final List requestHeaders = TestAbfsClient.getTestRequestHeaders(testClient); + final List requestHeaders = ITestAbfsClient.getTestRequestHeaders(testClient); requestHeaders.add(new AbfsHttpHeader(CONTENT_LENGTH, String.valueOf(buffer.length))); requestHeaders.add(new AbfsHttpHeader(X_MS_BLOB_TYPE, BLOCK_BLOB_TYPE)); String ContentMD5 = computeMd5(buffer); @@ -160,7 +160,7 @@ public void testPutBlockWithNullData() throws Exception { AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, configuration.get(FS_AZURE_ABFS_ACCOUNT_NAME)); - AbfsClient testClient = Mockito.spy(TestAbfsClient.createTestClientFromCurrentContext( + AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext( abfsClient, abfsConfiguration)); @@ -168,7 +168,7 @@ public void testPutBlockWithNullData() throws Exception { byte[] data = null; Path testPath = path(TEST_PATH); String finalTestPath = testPath.toString().substring(testPath.toString().lastIndexOf("/")); - final List requestHeaders = TestAbfsClient.getTestRequestHeaders(testClient); + final List requestHeaders = ITestAbfsClient.getTestRequestHeaders(testClient); final AbfsUriQueryBuilder abfsUriQueryBuilder = testClient.createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, BLOCK); @@ -211,13 +211,13 @@ public void testPutBlockWithDifferentLengthBlockIds() throws Exception { "world".getBytes(), "!".getBytes() )); - AbfsClient testClient = Mockito.spy(TestAbfsClient.createTestClientFromCurrentContext( + AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext( abfsClient, abfsConfiguration)); Path testPath = path(TEST_PATH); String finalTestPath = testPath.toString().substring(testPath.toString().lastIndexOf("/")); - final List requestHeaders = TestAbfsClient.getTestRequestHeaders(testClient); + final List requestHeaders = ITestAbfsClient.getTestRequestHeaders(testClient); final AbfsUriQueryBuilder abfsUriQueryBuilder = testClient.createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, BLOCK); @@ -273,12 +273,12 @@ public void testGetCommittedBlockList() throws IOException, IllegalAccessExcepti "world".getBytes(), "!".getBytes() )); - AbfsClient testClient = Mockito.spy(TestAbfsClient.createTestClientFromCurrentContext( + AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext( abfsClient, abfsConfiguration)); Path testPath = path(TEST_PATH); String finalTestPath = testPath.toString().substring(testPath.toString().lastIndexOf("/")); - final List requestHeaders = TestAbfsClient.getTestRequestHeaders(testClient); + final List requestHeaders = ITestAbfsClient.getTestRequestHeaders(testClient); final AbfsUriQueryBuilder abfsUriQueryBuilder = testClient.createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, BLOCK); List encodedBlockIds = new ArrayList<>(); @@ -306,7 +306,7 @@ public void testGetCommittedBlockList() throws IOException, IllegalAccessExcepti } byte[] bufferString = generateBlockListXml(blockIds).getBytes(StandardCharsets.UTF_8); final AbfsUriQueryBuilder abfsUriQueryBuilder1 = testClient.createDefaultUriQueryBuilder(); - final List requestHeaders1 = TestAbfsClient.getTestRequestHeaders(testClient); + final List requestHeaders1 = ITestAbfsClient.getTestRequestHeaders(testClient); abfsUriQueryBuilder1.addQuery(QUERY_PARAM_COMP, BLOCKLIST); requestHeaders1.add(new AbfsHttpHeader(CONTENT_LENGTH, String.valueOf(bufferString.length))); requestHeaders1.add(new AbfsHttpHeader(CONTENT_TYPE, "application/xml")); @@ -351,12 +351,12 @@ public void testPutBlockListForAdditionalBlockId() throws Exception { "world".getBytes(), "!".getBytes() )); - AbfsClient testClient = Mockito.spy(TestAbfsClient.createTestClientFromCurrentContext( + AbfsClient testClient = Mockito.spy(ITestAbfsClient.createTestClientFromCurrentContext( abfsClient, abfsConfiguration)); Path testPath = path(TEST_PATH); String finalTestPath = testPath.toString().substring(testPath.toString().lastIndexOf("/")); - final List requestHeaders = TestAbfsClient.getTestRequestHeaders(testClient); + final List requestHeaders = ITestAbfsClient.getTestRequestHeaders(testClient); final AbfsUriQueryBuilder abfsUriQueryBuilder = testClient.createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, BLOCK); @@ -383,7 +383,7 @@ public void testPutBlockListForAdditionalBlockId() throws Exception { } byte[] bufferString = generateBlockListXml(blockIds).getBytes(StandardCharsets.UTF_8); final AbfsUriQueryBuilder abfsUriQueryBuilder1 = testClient.createDefaultUriQueryBuilder(); - final List requestHeaders1 = TestAbfsClient.getTestRequestHeaders(testClient); + final List requestHeaders1 = ITestAbfsClient.getTestRequestHeaders(testClient); abfsUriQueryBuilder1.addQuery(QUERY_PARAM_COMP, BLOCKLIST); requestHeaders1.add(new AbfsHttpHeader(CONTENT_LENGTH, String.valueOf(bufferString.length))); requestHeaders1.add(new AbfsHttpHeader(CONTENT_TYPE, "application/xml")); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClientThrottlingAnalyzer.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClientThrottlingAnalyzer.java index 3f680e499300dd..bf3bd203a62ed9 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClientThrottlingAnalyzer.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsClientThrottlingAnalyzer.java @@ -18,9 +18,15 @@ package org.apache.hadoop.fs.azurebfs.services; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.junit.Test; +import java.io.IOException; + +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ANALYSIS_PERIOD; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -33,6 +39,15 @@ public class TestAbfsClientThrottlingAnalyzer { + ANALYSIS_PERIOD / 10; private static final long MEGABYTE = 1024 * 1024; private static final int MAX_ACCEPTABLE_PERCENT_DIFFERENCE = 20; + private AbfsConfiguration abfsConfiguration; + + public TestAbfsClientThrottlingAnalyzer() throws IOException, IllegalAccessException { + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + configuration.setInt(FS_AZURE_ANALYSIS_PERIOD, 1000); + this.abfsConfiguration = new AbfsConfiguration(configuration, + "dummy"); + } private void sleep(long milliseconds) { try { @@ -82,8 +97,7 @@ private void validateLessThanOrEqual(long maxExpected, long actual) { @Test public void testNoMetricUpdatesThenNoWaiting() { AbfsClientThrottlingAnalyzer analyzer = new AbfsClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); + "test", abfsConfiguration); validate(0, analyzer.getSleepDuration()); sleep(ANALYSIS_PERIOD_PLUS_10_PERCENT); validate(0, analyzer.getSleepDuration()); @@ -96,8 +110,7 @@ public void testNoMetricUpdatesThenNoWaiting() { @Test public void testOnlySuccessThenNoWaiting() { AbfsClientThrottlingAnalyzer analyzer = new AbfsClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); + "test", abfsConfiguration); analyzer.addBytesTransferred(8 * MEGABYTE, false); validate(0, analyzer.getSleepDuration()); sleep(ANALYSIS_PERIOD_PLUS_10_PERCENT); @@ -112,8 +125,7 @@ public void testOnlySuccessThenNoWaiting() { @Test public void testOnlyErrorsAndWaiting() { AbfsClientThrottlingAnalyzer analyzer = new AbfsClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); + "test", abfsConfiguration); validate(0, analyzer.getSleepDuration()); analyzer.addBytesTransferred(4 * MEGABYTE, true); sleep(ANALYSIS_PERIOD_PLUS_10_PERCENT); @@ -132,8 +144,7 @@ public void testOnlyErrorsAndWaiting() { @Test public void testSuccessAndErrorsAndWaiting() { AbfsClientThrottlingAnalyzer analyzer = new AbfsClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); + "test", abfsConfiguration); validate(0, analyzer.getSleepDuration()); analyzer.addBytesTransferred(8 * MEGABYTE, false); analyzer.addBytesTransferred(2 * MEGABYTE, true); @@ -157,8 +168,7 @@ public void testSuccessAndErrorsAndWaiting() { @Test public void testManySuccessAndErrorsAndWaiting() { AbfsClientThrottlingAnalyzer analyzer = new AbfsClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); + "test", abfsConfiguration); validate(0, analyzer.getSleepDuration()); final int numberOfRequests = 20; for (int i = 0; i < numberOfRequests; i++) { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java index 7d1a44b038642d..5c4f24362381b6 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStream.java @@ -72,6 +72,7 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext( boolean isFlushEnabled, boolean disableOutputStreamFlush, boolean isAppendBlob, + boolean isExpectHeaderEnabled, AbfsClient client, String path, TracingContext tracingContext, @@ -89,6 +90,7 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext( return new AbfsOutputStreamContext(2) .withWriteBufferSize(writeBufferSize) + .enableExpectHeader(isExpectHeaderEnabled) .enableFlush(isFlushEnabled) .disableOutputStreamFlush(disableOutputStreamFlush) .withStreamStatistics(new AbfsOutputStreamStatisticsImpl()) @@ -137,6 +139,7 @@ public void verifyShortWriteRequest() throws Exception { true, false, false, + true, client, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", @@ -157,9 +160,9 @@ public void verifyShortWriteRequest() throws Exception { out.hsync(); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, WRITE_SIZE, APPEND_MODE, false, null); + 0, 0, WRITE_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - WRITE_SIZE, 0, 2 * WRITE_SIZE, APPEND_MODE, false, null); + WRITE_SIZE, 0, 2 * WRITE_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), @@ -196,6 +199,7 @@ public void verifyWriteRequest() throws Exception { true, false, false, + true, client, PATH, tracingContext, @@ -209,9 +213,9 @@ public void verifyWriteRequest() throws Exception { out.close(); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, 5*WRITE_SIZE-BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, 5*WRITE_SIZE-BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), @@ -269,6 +273,7 @@ public void verifyWriteRequestOfBufferSizeAndClose() throws Exception { true, false, false, + true, client, PATH, tracingContext, @@ -282,9 +287,9 @@ public void verifyWriteRequestOfBufferSizeAndClose() throws Exception { out.close(); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); @@ -339,6 +344,7 @@ public void verifyWriteRequestOfBufferSize() throws Exception { true, false, false, + true, client, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", @@ -354,9 +360,9 @@ public void verifyWriteRequestOfBufferSize() throws Exception { Thread.sleep(1000); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); @@ -392,6 +398,7 @@ public void verifyWriteRequestOfBufferSizeWithAppendBlob() throws Exception { true, false, true, + true, client, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", @@ -407,9 +414,9 @@ public void verifyWriteRequestOfBufferSizeWithAppendBlob() throws Exception { Thread.sleep(1000); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, true, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, true, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, true, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, true, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); @@ -453,6 +460,7 @@ public void verifyWriteRequestOfBufferSizeAndHFlush() throws Exception { true, false, false, + true, client, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", @@ -468,9 +476,9 @@ public void verifyWriteRequestOfBufferSizeAndHFlush() throws Exception { out.hflush(); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); @@ -521,6 +529,7 @@ public void verifyWriteRequestOfBufferSizeAndFlush() throws Exception { true, false, false, + true, client, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", @@ -538,9 +547,9 @@ public void verifyWriteRequestOfBufferSizeAndFlush() throws Exception { Thread.sleep(1000); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append( eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class)); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStreamBlob.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStreamBlob.java index 90e4aaa25bbd42..e6ea4e0d43b9fd 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStreamBlob.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsOutputStreamBlob.java @@ -68,6 +68,7 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext( boolean isFlushEnabled, boolean disableOutputStreamFlush, boolean isAppendBlob, + boolean isExpectHeaderEnabled, AbfsClient client, String path, TracingContext tracingContext, @@ -85,6 +86,7 @@ private AbfsOutputStreamContext populateAbfsOutputStreamContext( return new AbfsOutputStreamContext(2) .withWriteBufferSize(writeBufferSize) + .enableExpectHeader(isExpectHeaderEnabled) .enableFlush(isFlushEnabled) .disableOutputStreamFlush(disableOutputStreamFlush) .withStreamStatistics(new AbfsOutputStreamStatisticsImpl()) @@ -134,18 +136,19 @@ public AbfsClient getClient() throws IOException, IllegalAccessException { } public AbfsOutputStream getOutputStream(AbfsClient client, AbfsConfiguration abfsConf) throws IOException, IllegalAccessException { - AbfsOutputStream out = Mockito.spy(new AbfsOutputStream( + AbfsOutputStream out = new AbfsOutputStream( populateAbfsOutputStreamContext( BUFFER_SIZE, true, false, false, + true, client, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", FSOperationType.WRITE, abfsConf.getTracingHeaderFormat(), null), - createExecutorService(abfsConf)))); + createExecutorService(abfsConf))); return out; } @@ -172,9 +175,9 @@ public void verifyShortWriteRequest() throws Exception { out.hsync(); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, WRITE_SIZE, APPEND_MODE, false, null); + 0, 0, WRITE_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - WRITE_SIZE, 0, 2 * WRITE_SIZE, APPEND_MODE, false, null); + WRITE_SIZE, 0, 2 * WRITE_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append(any(), eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), @@ -204,9 +207,9 @@ public void verifyWriteRequest() throws Exception { out.close(); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, 5 * WRITE_SIZE - BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, 5 * WRITE_SIZE - BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append(any(), eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), @@ -251,9 +254,9 @@ public void verifyWriteRequestOfBufferSizeAndClose() throws Exception { out.close(); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append(any(), eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class), any()); @@ -294,9 +297,9 @@ public void verifyWriteRequestOfBufferSize() throws Exception { Thread.sleep(1000); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append(any(), eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class), any()); @@ -320,6 +323,7 @@ public void verifyWriteRequestOfBufferSizeWithAppendBlob() throws Exception { true, false, true, + true, client, PATH, new TracingContext(abfsConf.getClientCorrelationId(), "test-fs-id", @@ -345,7 +349,7 @@ public void verifyWriteRequestOfBufferSizeWithAppendBlob() throws Exception { @Test public void verifyWriteRequestOfBufferSizeAndHFlush() throws Exception { AbfsClient client = getClient(); - AbfsOutputStream out = Mockito.spy(getOutputStream(client, getConf())); + AbfsOutputStream out = getOutputStream(client, getConf()); final byte[] b = new byte[BUFFER_SIZE]; new Random().nextBytes(b); @@ -356,9 +360,9 @@ public void verifyWriteRequestOfBufferSizeAndHFlush() throws Exception { out.hflush(); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append(any(), eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class), any()); @@ -401,9 +405,9 @@ public void verifyWriteRequestOfBufferSizeAndFlush() throws Exception { Thread.sleep(1000); AppendRequestParameters firstReqParameters = new AppendRequestParameters( - 0, 0, BUFFER_SIZE, APPEND_MODE, false, null); + 0, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); AppendRequestParameters secondReqParameters = new AppendRequestParameters( - BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null); + BUFFER_SIZE, 0, BUFFER_SIZE, APPEND_MODE, false, null, true); verify(client, times(1)).append(any(), eq(PATH), any(byte[].class), refEq(firstReqParameters), any(), any(TracingContext.class), any()); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java index 0f8dc55aa14a4c..511e110c2887c8 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestExponentialRetryPolicy.java @@ -18,20 +18,38 @@ package org.apache.hadoop.fs.azurebfs.services; +import static java.net.HttpURLConnection.HTTP_INTERNAL_ERROR; +import static org.apache.hadoop.fs.azure.integration.AzureTestConstants.TEST_CONFIGURATION_FILE_NAME; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_BACKOFF_INTERVAL; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_BACKOFF_INTERVAL; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_IO_RETRIES; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MIN_BACKOFF_INTERVAL; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT1_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ACCOUNT_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ENABLE_AUTOTHROTTLING; +import static org.junit.Assume.assumeTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import java.net.URI; import java.util.Random; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.assertj.core.api.Assertions; import org.junit.Assert; +import org.junit.Assume; import org.junit.Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.mockito.Mockito; /** * Unit test TestExponentialRetryPolicy. @@ -41,6 +59,9 @@ public class TestExponentialRetryPolicy extends AbstractAbfsIntegrationTest { private final int noRetryCount = 0; private final int retryCount = new Random().nextInt(maxRetryCount); private final int retryCountBeyondMax = maxRetryCount + 1; + private static final String TEST_PATH = "/testfile"; + private static final double MULTIPLYING_FACTOR = 1.5; + private static final int ANALYSIS_PERIOD = 10000; public TestExponentialRetryPolicy() throws Exception { @@ -67,6 +88,172 @@ public void testDefaultMaxIORetryCount() throws Exception { testMaxIOConfig(abfsConfig); } + @Test + public void testThrottlingIntercept() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + final Configuration configuration = new Configuration(); + configuration.addResource(TEST_CONFIGURATION_FILE_NAME); + configuration.setBoolean(FS_AZURE_ENABLE_AUTOTHROTTLING, false); + + // On disabling throttling AbfsNoOpThrottlingIntercept object is returned + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + "dummy.dfs.core.windows.net"); + AbfsThrottlingIntercept intercept; + AbfsClient abfsClient = ITestAbfsClient.createTestClientFromCurrentContext(getClient(fs), abfsConfiguration); + intercept = abfsClient.getIntercept(); + Assertions.assertThat(intercept) + .describedAs("AbfsNoOpThrottlingIntercept instance expected") + .isInstanceOf(AbfsNoOpThrottlingIntercept.class); + + configuration.setBoolean(FS_AZURE_ENABLE_AUTOTHROTTLING, true); + configuration.setBoolean(FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED, true); + // On disabling throttling AbfsClientThrottlingIntercept object is returned + AbfsConfiguration abfsConfiguration1 = new AbfsConfiguration(configuration, + "dummy1.dfs.core.windows.net"); + AbfsClient abfsClient1 = ITestAbfsClient.createTestClientFromCurrentContext(getClient(fs), abfsConfiguration1); + intercept = abfsClient1.getIntercept(); + Assertions.assertThat(intercept) + .describedAs("AbfsClientThrottlingIntercept instance expected") + .isInstanceOf(AbfsClientThrottlingIntercept.class); + } + + @Test + public void testCreateMultipleAccountThrottling() throws Exception { + Configuration config = new Configuration(getRawConfiguration()); + String accountName = config.get(FS_AZURE_ACCOUNT_NAME); + if (accountName == null) { + // check if accountName is set using different config key + accountName = config.get(FS_AZURE_ABFS_ACCOUNT1_NAME); + } + assumeTrue("Not set: " + FS_AZURE_ABFS_ACCOUNT1_NAME, + accountName != null && !accountName.isEmpty()); + + Configuration rawConfig1 = new Configuration(); + rawConfig1.addResource(TEST_CONFIGURATION_FILE_NAME); + + AbfsRestOperation successOp = mock(AbfsRestOperation.class); + AbfsHttpOperation http500Op = mock(AbfsHttpOperation.class); + when(http500Op.getStatusCode()).thenReturn(HTTP_INTERNAL_ERROR); + when(successOp.getResult()).thenReturn(http500Op); + + AbfsConfiguration configuration = Mockito.mock(AbfsConfiguration.class); + when(configuration.getAnalysisPeriod()).thenReturn(ANALYSIS_PERIOD); + when(configuration.isAutoThrottlingEnabled()).thenReturn(true); + when(configuration.accountThrottlingEnabled()).thenReturn(false); + + AbfsThrottlingIntercept instance1 = AbfsThrottlingInterceptFactory.getInstance(accountName, configuration); + String accountName1 = config.get(FS_AZURE_ABFS_ACCOUNT1_NAME); + + assumeTrue("Not set: " + FS_AZURE_ABFS_ACCOUNT1_NAME, + accountName1 != null && !accountName1.isEmpty()); + + AbfsThrottlingIntercept instance2 = AbfsThrottlingInterceptFactory.getInstance(accountName1, configuration); + //if singleton is enabled, for different accounts both the instances should return same value + Assertions.assertThat(instance1) + .describedAs( + "if singleton is enabled, for different accounts both the instances should return same value") + .isEqualTo(instance2); + + when(configuration.accountThrottlingEnabled()).thenReturn(true); + AbfsThrottlingIntercept instance3 = AbfsThrottlingInterceptFactory.getInstance(accountName, configuration); + AbfsThrottlingIntercept instance4 = AbfsThrottlingInterceptFactory.getInstance(accountName1, configuration); + AbfsThrottlingIntercept instance5 = AbfsThrottlingInterceptFactory.getInstance(accountName, configuration); + //if singleton is not enabled, for different accounts instances should return different value + Assertions.assertThat(instance3) + .describedAs( + "iff singleton is not enabled, for different accounts instances should return different value") + .isNotEqualTo(instance4); + + //if singleton is not enabled, for same accounts instances should return same value + Assertions.assertThat(instance3) + .describedAs( + "if singleton is not enabled, for same accounts instances should return same value") + .isEqualTo(instance5); + } + + @Test + public void testOperationOnAccountIdle() throws Exception { + //Get the filesystem. + AzureBlobFileSystem fs = getFileSystem(); + AbfsClient client = getClient(fs); + AbfsConfiguration configuration1 = client.getAbfsConfiguration(); + Assume.assumeTrue(configuration1.isAutoThrottlingEnabled()); + Assume.assumeTrue(configuration1.accountThrottlingEnabled()); + + AbfsClientThrottlingIntercept accountIntercept + = (AbfsClientThrottlingIntercept) client.getIntercept(); + final byte[] b = new byte[2 * MIN_BUFFER_SIZE]; + new Random().nextBytes(b); + + Path testPath = path(TEST_PATH); + + //Do an operation on the filesystem. + try (FSDataOutputStream stream = fs.create(testPath)) { + stream.write(b); + } + + //Don't perform any operation on the account. + int sleepTime = (int) ((getAbfsConfig().getAccountOperationIdleTimeout()) * MULTIPLYING_FACTOR); + Thread.sleep(sleepTime); + + try (FSDataInputStream streamRead = fs.open(testPath)) { + streamRead.read(b); + } + + //Perform operations on another account. + AzureBlobFileSystem fs1 = new AzureBlobFileSystem(); + Configuration config = new Configuration(getRawConfiguration()); + String accountName1 = config.get(FS_AZURE_ABFS_ACCOUNT1_NAME); + assumeTrue("Not set: " + FS_AZURE_ABFS_ACCOUNT1_NAME, + accountName1 != null && !accountName1.isEmpty()); + final String abfsUrl1 = this.getFileSystemName() + "12" + "@" + accountName1; + URI defaultUri1 = null; + defaultUri1 = new URI("abfss", abfsUrl1, null, null, null); + fs1.initialize(defaultUri1, getRawConfiguration()); + AbfsClient client1 = getClient(fs1); + AbfsClientThrottlingIntercept accountIntercept1 + = (AbfsClientThrottlingIntercept) client1.getIntercept(); + try (FSDataOutputStream stream1 = fs1.create(testPath)) { + stream1.write(b); + } + + //Verify the write analyzer for first account is idle but the read analyzer is not idle. + Assertions.assertThat(accountIntercept.getWriteThrottler() + .getIsOperationOnAccountIdle() + .get()) + .describedAs("Write analyzer for first account should be idle the first time") + .isTrue(); + + Assertions.assertThat( + accountIntercept.getReadThrottler() + .getIsOperationOnAccountIdle() + .get()) + .describedAs("Read analyzer for first account should not be idle") + .isFalse(); + + //Verify the write analyzer for second account is not idle. + Assertions.assertThat( + accountIntercept1.getWriteThrottler() + .getIsOperationOnAccountIdle() + .get()) + .describedAs("Write analyzer for second account should not be idle") + .isFalse(); + + //Again perform an operation on the first account. + try (FSDataOutputStream stream2 = fs.create(testPath)) { + stream2.write(b); + } + + //Verify the write analyzer on first account is not idle. + Assertions.assertThat( + accountIntercept.getWriteThrottler() + .getIsOperationOnAccountIdle() + .get()) + .describedAs( + "Write analyzer for first account should not be idle second time") + .isFalse(); + } + @Test public void testAbfsConfigConstructor() throws Exception { // Ensure we choose expected values that are not defaults diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java index 71e0326f79273d..3b7e3cacd0ec15 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/DelegationSASGenerator.java @@ -61,6 +61,7 @@ public String getDelegationSAS(String accountName, String containerName, String case SASTokenProvider.WRITE_OPERATION: case SASTokenProvider.SET_PROPERTIES_OPERATION: case SASTokenProvider.LEASE_OPERATION: + case SASTokenProvider.SET_BLOB_METADATA_OPERATION: sp = "w"; break; case SASTokenProvider.DELETE_OPERATION: @@ -96,6 +97,7 @@ public String getDelegationSAS(String accountName, String containerName, String case SASTokenProvider.GET_BLOCK_LIST: case SASTokenProvider.GET_BLOB_PROPERTIES_OPERATION: case SASTokenProvider.GET_CONTAINER_PROPERTIES_OPERATION: + case SASTokenProvider.GET_BLOB_METADATA_OPERATION: case SASTokenProvider.READ_OPERATION: sp = "r"; break;