This repository was archived by the owner on Jun 3, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 192
[Pipeline Refactor] async #1380
Merged
Merged
Changes from all commits
Commits
Show all changes
39 commits
Select commit
Hold shift + click to select a range
6c75b65
initial functionality and working example with image classification
dsikka 75de103
remove testing image
dsikka aa5d885
rebase fixes
dsikka 8cc63ee
initial functionality and working example with image classification
dsikka ab2b711
text gen
dsikka 00cb85e
updates func
dsikka 5cf4b3f
prompt inference, initial functionality
dsikka 1b951dc
remove image; update state docstring
dsikka 809cfc1
Fix typo
dsikka 6336d8e
add todo for split/join
dsikka 3f2193d
remove context, clean-up args, remove prefill_preprocess_operaator
dsikka 216ceea
fix docstrings
dsikka 02b74d4
initial functionality and working example with image classification
dsikka 37f090c
updates func
dsikka 7bd25da
prompt inference, initial functionality
dsikka 98bc123
finish generation operators and update routes
dsikka ef8277b
further breakdown operators
dsikka 664abdd
add operators
dsikka 754ce2c
fix can_operate condition
dsikka ed7cd58
update can_operate to not rely on the inference_state
dsikka 5d56421
rebase + update
dsikka 5086e1f
fix condition
dsikka 44156e6
async initial functionality
dsikka 740eb67
fix capacity settting again
dsikka c991c30
Merge branch 'v2' into features/v2/generation
dsikka 473a691
Merge branch 'features/v2/generation' into features/v2/async
dsikka 8ed2a64
Merge branch 'v2' into features/v2/async
dsikka c2666dd
add blocking
dsikka 59f69d3
more testing
dsikka 6276a77
Merge branch 'v2' into features/v2/async
dsikka 235178b
update to use split/join
dsikka fe15318
fix
dsikka 1645aa8
rebase fix
dsikka a7a003c
remove index
dsikka bafdd24
change event loop
dsikka b981371
Merge branch 'v2' into features/v2/async
dsikka 9a15bf6
rebase fix
dsikka 8f7fbd6
update async run to use new operator scheduling properly
dsikka 0d82e49
Merge branch 'v2' into features/v2/async
dsikka File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,9 +12,9 @@ | |
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
|
|
||
| import asyncio | ||
| import copy | ||
| from typing import Any, Dict, List, Union | ||
| from typing import Any, Dict, List, Optional, Union | ||
|
|
||
| from deepsparse.v2.operators import EngineOperator, Operator | ||
| from deepsparse.v2.routers import Router | ||
|
|
@@ -68,10 +68,7 @@ def __init__( | |
| self._scheduler_group = SchedulerGroup(self.schedulers) | ||
|
|
||
| def _run_next( | ||
| self, | ||
| inp: Any, | ||
| inference_state: InferenceState, | ||
| next_step: str, | ||
| self, inp: Any, inference_state: InferenceState, next_step: str, **kwargs | ||
| ): | ||
| if ( | ||
| isinstance(self.ops[next_step], EngineOperator) | ||
|
|
@@ -88,10 +85,14 @@ def _run_next( | |
| inp=inp, | ||
| pipeline_state=self.pipeline_state, | ||
| inference_state=inference_state, | ||
| **kwargs, | ||
| ) | ||
|
|
||
| def _run_sub_graphs( | ||
| self, sub_graph_inputs: List[Any], sub_graphs: List[SubGraph] | ||
| async def _run_sub_graphs( | ||
| self, | ||
| sub_graph_inputs: List[Any], | ||
| sub_graphs: List[SubGraph], | ||
| loop: Optional[asyncio.AbstractEventLoop] = None, | ||
| ) -> List[Any]: | ||
| """ | ||
| Run a list of sub_graphs asynchronously. Polls to identify the sub graph that is | ||
|
|
@@ -111,14 +112,16 @@ def _run_sub_graphs( | |
| """ | ||
| for i in range(len(sub_graphs)): | ||
| sub_graphs[i].output = self._run_next( | ||
| sub_graph_inputs[i], sub_graphs[i].inf, sub_graphs[i].step | ||
| sub_graph_inputs[i], sub_graphs[i].inf, sub_graphs[i].step, loop=loop | ||
| ) | ||
|
|
||
| # Execute all sub graphs until all graphs have been completed. | ||
| while any(not x.completed for x in sub_graphs): | ||
| for sub_graph in sub_graphs: | ||
| if not sub_graph.completed: | ||
| # get the result for the completed operator; resolve its output | ||
| if isinstance(sub_graph.output, asyncio.Future): | ||
| await sub_graph.output | ||
| operator_output = sub_graph.output.result() | ||
| operator_output = sub_graph.parse_output(operator_output) | ||
|
|
||
|
|
@@ -141,18 +144,80 @@ def _run_sub_graphs( | |
| inp=operator_output, | ||
| inference_state=sub_graph.inf, | ||
| next_step=next_step, | ||
| loop=loop, | ||
| ) | ||
|
|
||
| return [x.output for x in sub_graphs] | ||
|
|
||
| def _apply_split(self, inp: Any, inference_state: InferenceState): | ||
| async def run_async(self, *args, inference_state: InferenceState, **kwargs): | ||
| """ | ||
| Split inputs using the pipeline's expand_inputs function. Inputs are split | ||
| into a batch size of one when a SPLIT_ROUTE node is found in a given pipeline's | ||
| provided router. The split batches are run asynchronously and then joined when | ||
| a JOIN_ROUTE node is found, using the pipeline's condense_inputs function. | ||
| Run through the operators using the provided router and scheduler. | ||
| The input to a given operator is the output of the previous operator. | ||
|
|
||
| :param inference_state: inference_state for the pipeline. | ||
| :param pipeline_state: pipeline_state for the pipeline. The values in the state | ||
| are created during pipeline creation and are read-only during inference. | ||
| """ | ||
| loop = asyncio.get_running_loop() | ||
|
|
||
| next_step = self.router.START_ROUTE | ||
| operator_output = None | ||
|
|
||
| while next_step != self.router.END_ROUTE: | ||
| # Either a dictionary key or valid index | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this comment refers to |
||
|
|
||
| if next_step == self.router.SPLIT_ROUTE: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code flow is a bit funky, what do you think about making this an I think that would look like this: async def run_async(self, *args, inference_state: InferenceState, **kwargs):
"""
Run through the operators using the provided router and scheduler.
The input to a given operator is the output of the previous operator.
:param inference_state: inference_state for the pipeline.
:param pipeline_state: pipeline_state for the pipeline. The values in the state
are created during pipeline creation and are read-only during inference.
"""
loop = asyncio.get_running_loop()
next_step = self.router.START_ROUTE
operator_output = None
while next_step != self.router.END_ROUTE:
# Either a dictionary key or valid index
if next_step == self.router.START_ROUTE:
outputs = run_func(
*args,
func=self._scheduler_group.submit,
operator=self.ops[next_step],
inference_state=inference_state,
pipeline_state=self.pipeline_state,
loop=loop,
**kwargs,
)
await outputs
operator_output = outputs.result()
elif next_step == self.router.SPLIT_ROUTE:
if operator_output is None:
raise ValueError(
f"{self.router.SPLIT_ROUTE} should appear after "
f"{self.ROUTER.START_ROUTE}"
)
operator_output = await self._apply_split(
operator_output, inference_state, loop=loop
)
else:
outputs = self._run_next(
inp=operator_output,
next_step=next_step,
inference_state=inference_state,
loop=loop,
)
await outputs
operator_output = outputs.result()
if next_step == self.router.SPLIT_ROUTE:
next_step = self.router.route[self.router.JOIN_ROUTE]
continue
if isinstance(operator_output, tuple):
state_update = operator_output[-1]
operator_output = operator_output[0]
next_step = self.router.next(next_step, self.ops, operator_output)
if state_update:
inference_state.update_state(state_update)
return operator_output Maybe a little easier to reason about, but maybe not. |
||
| if operator_output is None: | ||
| raise ValueError( | ||
| f"{self.router.SPLIT_ROUTE} should appear after " | ||
| f"{self.ROUTER.START_ROUTE}" | ||
| ) | ||
|
|
||
| operator_output = await self._apply_split( | ||
| operator_output, inference_state, loop=loop | ||
| ) | ||
| next_step = self.router.route[self.router.JOIN_ROUTE] | ||
| if next_step == self.router.END_ROUTE: | ||
| return operator_output | ||
|
|
||
| if next_step == self.router.START_ROUTE: | ||
| outputs = run_func( | ||
| *args, | ||
| func=self._scheduler_group.submit, | ||
| operator=self.ops[next_step], | ||
| inference_state=inference_state, | ||
| pipeline_state=self.pipeline_state, | ||
| loop=loop, | ||
| **kwargs, | ||
| ) | ||
| await outputs | ||
| operator_output = outputs.result() | ||
|
|
||
| else: | ||
| outputs = self._run_next( | ||
| inp=operator_output, | ||
| next_step=next_step, | ||
| inference_state=inference_state, | ||
| loop=loop, | ||
| ) | ||
| await outputs | ||
| operator_output = outputs.result() | ||
|
|
||
| if isinstance(operator_output, tuple): | ||
| state_update = operator_output[-1] | ||
| operator_output = operator_output[0] | ||
dsikka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| next_step = self.router.next(next_step, self.ops, operator_output) | ||
| if state_update: | ||
dsikka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| inference_state.update_state(state_update) | ||
| return operator_output | ||
|
|
||
| async def _apply_split( | ||
| self, | ||
| inp: Any, | ||
| inference_state: InferenceState, | ||
| loop: Optional[asyncio.AbstractEventLoop] = None, | ||
| ): | ||
| batches, orig_batch_size = self.expand_inputs(inp, 1) | ||
|
|
||
| # Create a list of SplitRoutes, per batch size 1 | ||
|
|
@@ -168,8 +233,8 @@ def _apply_split(self, inp: Any, inference_state: InferenceState): | |
| for i in range(len(batches)) | ||
| ] | ||
|
|
||
| outputs = self._run_sub_graphs( | ||
| sub_graph_inputs=batches, sub_graphs=split_graphs | ||
| outputs = await self._run_sub_graphs( | ||
| sub_graph_inputs=batches, sub_graphs=split_graphs, loop=loop | ||
| ) | ||
| return self.condense_inputs(outputs) | ||
|
|
||
|
|
@@ -200,7 +265,9 @@ def run( | |
| f"{self.ROUTER.START_ROUTE}" | ||
| ) | ||
|
|
||
| operator_output = self._apply_split(operator_output, inference_state) | ||
| operator_output = asyncio.run( | ||
| self._apply_split(operator_output, inference_state) | ||
| ) | ||
| next_step = self.router.route[self.router.JOIN_ROUTE] | ||
| if next_step == self.router.END_ROUTE: | ||
| return operator_output | ||
|
|
@@ -232,8 +299,10 @@ def run( | |
| end=[self.router.SPLIT_ROUTE, self.router.END_ROUTE], | ||
| ) | ||
|
|
||
| operator_output = self._run_sub_graphs( | ||
| sub_graph_inputs=[operator_output], sub_graphs=[graph] | ||
| operator_output = asyncio.run( | ||
| self._run_sub_graphs( | ||
| sub_graph_inputs=[operator_output], sub_graphs=[graph] | ||
| ) | ||
| )[0] | ||
|
|
||
| inference_state = graph.inf | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if there's any reason for an operator to have an output of
None, but if so, you might consider using another sentinel value here and in the check below 🤷🏻There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess you could also consider adding another local variable to avoid overloading the use of
operator_outputsomething like