-
Notifications
You must be signed in to change notification settings - Fork 3.2k
[Batch] Patch TaskOperations.add_collection with convenience functionality #3217
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 3 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
5d90777
patch file for bulk add task
ashirey-msft 70e39ec
Update bulk task add to use local sdk references and added tests
bgklein dca1f06
Update error handling
bgklein 4da57a8
update doc text
bgklein 9f9277a
Merge branch 'master' into master
lmazuel bcf405f
Dynamically inject custom error into models
bgklein e04778e
Merge branch 'master' into master
lmazuel File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| # -------------------------------------------------------------------------------------------- | ||
| # Copyright (c) Microsoft Corporation. All rights reserved. | ||
| # Licensed under the MIT License. See License.txt in the project root for license information. | ||
| # -------------------------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| class CreateTasksErrorException(Exception): | ||
| """ Aggregate Exception containing details for any failures from a task add operation. | ||
|
|
||
| :param str message: Error message describing exit reason | ||
| :param [~TaskAddParameter] pending_task_list: List of tasks remaining to be submitted. | ||
| :param [~TaskAddResult] failure_tasks: List of tasks which failed to add | ||
| :param [~Exception] errors: List of unknown errors forcing early termination | ||
| """ | ||
| def __init__(self, message, pending_task_list=None, failure_tasks=None, errors=None): | ||
| self.message = message | ||
| self.pending_tasks = list(pending_task_list) | ||
| self.failure_tasks = list(failure_tasks) | ||
| self.errors = list(errors) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,288 @@ | ||
| import collections | ||
| import logging | ||
| import threading | ||
| import types | ||
|
|
||
| from ..models import BatchErrorException, TaskAddCollectionResult, TaskAddStatus, CreateTasksErrorException | ||
| from ..operations.task_operations import TaskOperations | ||
|
|
||
| MAX_TASKS_PER_REQUEST = 100 | ||
| _LOGGER = logging.getLogger(__name__) | ||
|
|
||
| class _TaskWorkflowManager(object): | ||
| """Worker class for one add_collection request | ||
|
|
||
| :param ~TaskOperations task_operations: Parent object which instantiated this | ||
| :param job_id: The ID of the job to which the task collection is to be | ||
| added. | ||
| :type job_id: str | ||
| :param tasks_to_add: The collection of tasks to add. | ||
| :type tasks_to_add: list of :class:`TaskAddParameter | ||
| <azure.batch.models.TaskAddParameter>` | ||
| :param task_add_collection_options: Additional parameters for the | ||
| operation | ||
| :type task_add_collection_options: :class:`TaskAddCollectionOptions | ||
| <azure.batch.models.TaskAddCollectionOptions>` | ||
| :param dict custom_headers: headers that will be added to the request | ||
| :param bool raw: returns the direct response alongside the | ||
| deserialized response | ||
| """ | ||
|
|
||
| def __init__( | ||
| self, | ||
| client, | ||
| job_id, | ||
| tasks_to_add, | ||
| task_add_collection_options=None, | ||
| custom_headers=None, | ||
| raw=False, | ||
| **kwargs): | ||
| # Append operations thread safe - Only read once all threads have completed | ||
| # List of tasks which failed to add due to a returned client error | ||
| self._failure_tasks = collections.deque() | ||
| # List of unknown exceptions which occurred during requests. | ||
| self._errors = collections.deque() | ||
|
|
||
| # synchronized through lock variables | ||
| self.error = None # Only written once all threads have completed | ||
| self._max_tasks_per_request = MAX_TASKS_PER_REQUEST | ||
| self._tasks_to_add = collections.deque(tasks_to_add) | ||
|
|
||
| self._error_lock = threading.Lock() | ||
| self._max_tasks_lock = threading.Lock() | ||
| self._pending_queue_lock = threading.Lock() | ||
|
|
||
| # Variables to be used for task add_collection requests | ||
| self._client = TaskOperations( | ||
| client._client, client.config, client._serialize, client._deserialize) | ||
| self._job_id = job_id | ||
| self._task_add_collection_options = task_add_collection_options | ||
| self._custom_headers = custom_headers | ||
| self._raw = raw | ||
| self._kwargs = dict(**kwargs) | ||
|
|
||
| def _bulk_add_tasks(self, results_queue, chunk_tasks_to_add): | ||
| """Adds a chunk of tasks to the job | ||
|
|
||
| Retry chunk if body exceeds the maximum request size and retry tasks | ||
| if failed due to server errors. | ||
|
|
||
| :param results_queue: Queue to place the return value of the request | ||
| :type results_queue: collections.deque | ||
| :param chunk_tasks_to_add: Chunk of at most 100 tasks with retry details | ||
| :type chunk_tasks_to_add: list[~TrackedCloudTask] | ||
| """ | ||
|
|
||
| try: | ||
| add_collection_response = self._client.add_collection( | ||
| self._job_id, | ||
| chunk_tasks_to_add, | ||
| self._task_add_collection_options, | ||
| self._custom_headers, | ||
| self._raw) | ||
| except BatchErrorException as e: | ||
| # In case of a chunk exceeding the MaxMessageSize split chunk in half | ||
| # and resubmit smaller chunk requests | ||
| # TODO: Replace string with constant variable once available in SDK | ||
| if e.error.code == "RequestBodyTooLarge": # pylint: disable=no-member | ||
| # In this case the task is misbehaved and will not be able to be added due to: | ||
bgklein marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # 1) The task exceeding the max message size | ||
| # 2) A single cell of the task exceeds the per-cell limit, or | ||
| # 3) Sum of all cells exceeds max row limit | ||
| if len(chunk_tasks_to_add) == 1: | ||
| failed_task = chunk_tasks_to_add.pop() | ||
| self._errors.appendleft(e) | ||
| _LOGGER.error("Failed to add task with ID %s due to the body" | ||
| " exceeding the maximum request size", failed_task.id) | ||
| else: | ||
| # Assumption: Tasks are relatively close in size therefore if one batch exceeds size limit | ||
| # we should decrease the initial task collection size to avoid repeating the error | ||
| # Midpoint is lower bounded by 1 due to above base case | ||
| midpoint = int(len(chunk_tasks_to_add) / 2) | ||
| # Restrict one thread at a time to do this compare and set, | ||
| # therefore forcing max_tasks_per_request to be strictly decreasing | ||
| with self._max_tasks_lock: | ||
| if midpoint < self._max_tasks_per_request: | ||
| self._max_tasks_per_request = midpoint | ||
| _LOGGER.info("Amount of tasks per request reduced from %s to %s due to the" | ||
| " request body being too large", str(self._max_tasks_per_request), | ||
| str(midpoint)) | ||
|
|
||
| # Not the most efficient solution for all cases, but the goal of this is to handle this | ||
| # exception and have it work in all cases where tasks are well behaved | ||
| # Behavior retries as a smaller chunk and | ||
| # appends extra tasks to queue to be picked up by another thread . | ||
| self._tasks_to_add.extendleft(chunk_tasks_to_add[midpoint:]) | ||
| self._bulk_add_tasks(results_queue, chunk_tasks_to_add[:midpoint]) | ||
| # Retry server side errors | ||
| elif 500 <= e.response.status_code <= 599: | ||
| self._tasks_to_add.extendleft(chunk_tasks_to_add) | ||
| else: | ||
| # Re-add to pending queue as unknown status / don't have result | ||
| self._tasks_to_add.extendleft(chunk_tasks_to_add) | ||
| # Unknown State - don't know if tasks failed to add or were successful | ||
| self._errors.appendleft(e) | ||
| except Exception as e: # pylint: disable=broad-except | ||
| # Re-add to pending queue as unknown status / don't have result | ||
| self._tasks_to_add.extendleft(chunk_tasks_to_add) | ||
| # Unknown State - don't know if tasks failed to add or were successful | ||
| self._errors.appendleft(e) | ||
| else: | ||
| try: | ||
| add_collection_response = add_collection_response.output | ||
| except AttributeError: | ||
| pass | ||
|
|
||
| for task_result in add_collection_response.value: # pylint: disable=no-member | ||
| if task_result.status == TaskAddStatus.server_error: | ||
| # Server error will be retried | ||
| with self._pending_queue_lock: | ||
| for task in chunk_tasks_to_add: | ||
| if task.id == task_result.task_id: | ||
| self._tasks_to_add.appendleft(task) | ||
| elif (task_result.status == TaskAddStatus.client_error | ||
| and not task_result.error.code == "TaskExists"): | ||
| # Client error will be recorded unless Task already exists | ||
| self._failure_tasks.appendleft(task_result) | ||
| else: | ||
| results_queue.appendleft(task_result) | ||
|
|
||
| def task_collection_thread_handler(self, results_queue): | ||
| """Main method for worker to run | ||
|
|
||
| Pops a chunk of tasks off the collection of pending tasks to be added and submits them to be added. | ||
|
|
||
| :param collections.deque results_queue: Queue for worker to output results to | ||
| """ | ||
| # Add tasks until either we run out or we run into an unexpected error | ||
| while self._tasks_to_add and not self._errors: | ||
| max_tasks = self._max_tasks_per_request # local copy | ||
| chunk_tasks_to_add = [] | ||
| with self._pending_queue_lock: | ||
| while len(chunk_tasks_to_add) < max_tasks and self._tasks_to_add: | ||
| chunk_tasks_to_add.append(self._tasks_to_add.pop()) | ||
bgklein marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if chunk_tasks_to_add: | ||
| self._bulk_add_tasks(results_queue, chunk_tasks_to_add) | ||
|
|
||
| # Only define error if all threads have finished and there were failures | ||
| with self._error_lock: | ||
| if threading.active_count() == 1 and (self._failure_tasks or self._errors): | ||
| self.error = CreateTasksErrorException( | ||
| "One or more tasks failed to be added", | ||
| self._failure_tasks, | ||
| self._tasks_to_add, | ||
| self._errors) | ||
|
|
||
|
|
||
| def _handle_output(results_queue): | ||
| """Scan output for exceptions | ||
|
|
||
| If there is an output from an add task collection call add it to the results. | ||
|
|
||
| :param results_queue: Queue containing results of attempted add_collection's | ||
| :type results_queue: collections.deque | ||
| :return: list of TaskAddResults | ||
| :rtype: list[~TaskAddResult] | ||
| """ | ||
| results = [] | ||
| while results_queue: | ||
| queue_item = results_queue.pop() | ||
| results.append(queue_item) | ||
| return results | ||
|
|
||
| def patch_client(client): | ||
| client.task.add_collection = types.MethodType(bulk_add_collection, client.task) | ||
|
|
||
| def bulk_add_collection( | ||
| client, | ||
| job_id, | ||
| value, | ||
| task_add_collection_options=None, | ||
| custom_headers=None, | ||
| raw=False, | ||
| threads=0, | ||
| **operation_config): | ||
| """Adds a collection of tasks to the specified job. | ||
|
|
||
| Note that each task must have a unique ID. The Batch service may not | ||
| return the results for each task in the same order the tasks were | ||
| submitted in this request. If the server times out or the connection is | ||
| closed during the request, the request may have been partially or fully | ||
| processed, or not at all. In such cases, the user should re-issue the | ||
| request. Note that it is up to the user to correctly handle failures | ||
| when re-issuing a request. For example, you should use the same task | ||
| IDs during a retry so that if the prior operation succeeded, the retry | ||
| will not create extra tasks unexpectedly. If the response contains any | ||
| tasks which failed to add, a client can retry the request. In a retry, | ||
| it is most efficient to resubmit only tasks that failed to add, and to | ||
| omit tasks that were successfully added on the first attempt. The | ||
| maximum lifetime of a task from addition to completion is 7 days. If a | ||
| task has not completed within 7 days of being added it will be | ||
| terminated by the Batch service and left in whatever state it was in at | ||
| that time. | ||
|
|
||
| :param job_id: The ID of the job to which the task collection is to be | ||
| added. | ||
| :type job_id: str | ||
| :param value: The collection of tasks to add. The total serialized | ||
| size of this collection must be less than 4MB. If it is greater than | ||
| 4MB (for example if each task has 100's of resource files or | ||
| environment variables), the request will fail with code | ||
| 'RequestBodyTooLarge' and should be retried again with fewer tasks. | ||
| :type value: list of :class:`TaskAddParameter | ||
| <azure.batch.models.TaskAddParameter>` | ||
| :param task_add_collection_options: Additional parameters for the | ||
| operation | ||
| :type task_add_collection_options: :class:`TaskAddCollectionOptions | ||
| <azure.batch.models.TaskAddCollectionOptions>` | ||
| :param dict custom_headers: headers that will be added to the request | ||
| :param bool raw: returns the direct response alongside the | ||
| deserialized response | ||
| :param int threads: number of threads to use in parallel when adding tasks. If specified | ||
| and greater than 0, will start additional threads to submit requests and wait for them to finish. | ||
| Otherwise will submit add_collection requests sequentially on main thread | ||
| :return: :class:`TaskAddCollectionResult | ||
| <azure.batch.models.TaskAddCollectionResult>` or | ||
| :class:`ClientRawResponse<msrest.pipeline.ClientRawResponse>` if | ||
| raw=true | ||
| :rtype: :class:`TaskAddCollectionResult | ||
| <azure.batch.models.TaskAddCollectionResult>` or | ||
| :class:`ClientRawResponse<msrest.pipeline.ClientRawResponse>` | ||
| :raises: | ||
| :class:`BatchErrorException<azure.batch.models.BatchErrorException>` | ||
| """ | ||
|
|
||
| results_queue = collections.deque() # deque operations(append/pop) are thread-safe | ||
| task_workflow_manager = _TaskWorkflowManager( | ||
| client, | ||
| job_id, | ||
| value, | ||
| task_add_collection_options, | ||
| custom_headers, | ||
| raw, | ||
| **operation_config) | ||
|
|
||
| # multi-threaded behavior | ||
| if threads: | ||
| if threads < 0: | ||
| raise ValueError("Threads must be positive or 0") | ||
|
|
||
| active_threads = [] | ||
| for i in range(threads): | ||
| active_threads.append(threading.Thread( | ||
| target=task_workflow_manager.task_collection_thread_handler, | ||
| args=(results_queue,))) | ||
| active_threads[-1].start() | ||
| for thread in active_threads: | ||
| thread.join() | ||
| # single-threaded behavior | ||
| else: | ||
| task_workflow_manager.task_collection_thread_handler(results_queue) | ||
|
|
||
| if task_workflow_manager.error: | ||
| raise task_workflow_manager.error # pylint: disable=raising-bad-type | ||
| else: | ||
| submitted_tasks = _handle_output(results_queue) | ||
| return TaskAddCollectionResult(value=submitted_tasks) | ||
| bulk_add_collection.metadata = {'url': '/jobs/{jobId}/addtaskcollection'} | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,10 @@ | |
| # regenerated. | ||
| # -------------------------------------------------------------------------- | ||
|
|
||
| ######DO NOT REMOVE###### | ||
|
||
| from ..custom.custom_errors import CreateTasksErrorException | ||
| ######DO NOT REMOVE###### | ||
|
|
||
| try: | ||
| from .pool_usage_metrics_py3 import PoolUsageMetrics | ||
| from .image_reference_py3 import ImageReference | ||
|
|
@@ -467,6 +471,7 @@ | |
| ) | ||
|
|
||
| __all__ = [ | ||
| 'CreateTasksErrorException', | ||
| 'PoolUsageMetrics', | ||
| 'ImageReference', | ||
| 'NodeAgentSku', | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.