neuralmagic · dsikka · Dec 5, 2023 · Oct 17, 2023 · Oct 17, 2023 · Oct 31, 2023
diff --git a/src/deepsparse/v2/pipeline.py b/src/deepsparse/v2/pipeline.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+import asyncio
 import copy
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Optional, Union
 
 from deepsparse.v2.operators import EngineOperator, Operator
 from deepsparse.v2.routers import Router
@@ -68,10 +68,7 @@ def __init__(
         self._scheduler_group = SchedulerGroup(self.schedulers)
 
     def _run_next(
-        self,
-        inp: Any,
-        inference_state: InferenceState,
-        next_step: str,
+        self, inp: Any, inference_state: InferenceState, next_step: str, **kwargs
     ):
         if (
             isinstance(self.ops[next_step], EngineOperator)
@@ -88,10 +85,14 @@ def _run_next(
             inp=inp,
             pipeline_state=self.pipeline_state,
             inference_state=inference_state,
+            **kwargs,
         )
 
-    def _run_sub_graphs(
-        self, sub_graph_inputs: List[Any], sub_graphs: List[SubGraph]
+    async def _run_sub_graphs(
+        self,
+        sub_graph_inputs: List[Any],
+        sub_graphs: List[SubGraph],
+        loop: Optional[asyncio.AbstractEventLoop] = None,
     ) -> List[Any]:
         """
         Run a list of sub_graphs asynchronously. Polls to identify the sub graph that is
@@ -111,14 +112,16 @@ def _run_sub_graphs(
         """
         for i in range(len(sub_graphs)):
             sub_graphs[i].output = self._run_next(
-                sub_graph_inputs[i], sub_graphs[i].inf, sub_graphs[i].step
+                sub_graph_inputs[i], sub_graphs[i].inf, sub_graphs[i].step, loop=loop
             )
 
         # Execute all sub graphs until all graphs have been completed.
         while any(not x.completed for x in sub_graphs):
             for sub_graph in sub_graphs:
                 if not sub_graph.completed:
                     # get the result for the completed operator; resolve its output
+                    if isinstance(sub_graph.output, asyncio.Future):
+                        await sub_graph.output
                     operator_output = sub_graph.output.result()
                     operator_output = sub_graph.parse_output(operator_output)
 
@@ -141,18 +144,80 @@ def _run_sub_graphs(
                             inp=operator_output,
                             inference_state=sub_graph.inf,
                             next_step=next_step,
+                            loop=loop,
                         )
 
         return [x.output for x in sub_graphs]
 
-    def _apply_split(self, inp: Any, inference_state: InferenceState):
+    async def run_async(self, *args, inference_state: InferenceState, **kwargs):
         """
-        Split inputs using the pipeline's expand_inputs function. Inputs are split
-        into a batch size of one when a SPLIT_ROUTE node is found in a given pipeline's
-        provided router. The split batches are run asynchronously and then joined when
-        a JOIN_ROUTE node is found, using the pipeline's condense_inputs function.
+        Run through the operators using the provided router and scheduler.
+        The input to a given operator is the output of the previous operator.
+
+        :param inference_state: inference_state for the pipeline.
+        :param pipeline_state: pipeline_state for the pipeline. The values in the state
+            are created during pipeline creation and are read-only during inference.
         """
+        loop = asyncio.get_running_loop()
+
+        next_step = self.router.START_ROUTE
+        operator_output = None
-        operator_output = None
+        operator_output = None
+        processed_start_route = False
-        operator_output = None
+        operator_output = None
+        processed_start_route = False
+
+        while next_step != self.router.END_ROUTE:
+            # Either a dictionary key or valid index
+
+            if next_step == self.router.SPLIT_ROUTE:
+                if operator_output is None:
+                    raise ValueError(
+                        f"{self.router.SPLIT_ROUTE} should appear after "
+                        f"{self.ROUTER.START_ROUTE}"
+                    )
+
+                operator_output = await self._apply_split(
+                    operator_output, inference_state, loop=loop
+                )
+                next_step = self.router.route[self.router.JOIN_ROUTE]
+                if next_step == self.router.END_ROUTE:
+                    return operator_output
 
+            if next_step == self.router.START_ROUTE:
+                outputs = run_func(
+                    *args,
+                    func=self._scheduler_group.submit,
+                    operator=self.ops[next_step],
+                    inference_state=inference_state,
+                    pipeline_state=self.pipeline_state,
+                    loop=loop,
+                    **kwargs,
+                )
+                await outputs
+                operator_output = outputs.result()
+
+            else:
+                outputs = self._run_next(
+                    inp=operator_output,
+                    next_step=next_step,
+                    inference_state=inference_state,
+                    loop=loop,
+                )
+                await outputs
+                operator_output = outputs.result()
+
+            if isinstance(operator_output, tuple):
+                state_update = operator_output[-1]
+                operator_output = operator_output[0]
+
+            next_step = self.router.next(next_step, self.ops, operator_output)
+            if state_update:
+                inference_state.update_state(state_update)
+        return operator_output
+
+    async def _apply_split(
+        self,
+        inp: Any,
+        inference_state: InferenceState,
+        loop: Optional[asyncio.AbstractEventLoop] = None,
+    ):
         batches, orig_batch_size = self.expand_inputs(inp, 1)
 
         # Create a list of SplitRoutes, per batch size 1
@@ -168,8 +233,8 @@ def _apply_split(self, inp: Any, inference_state: InferenceState):
             for i in range(len(batches))
         ]
 
-        outputs = self._run_sub_graphs(
-            sub_graph_inputs=batches, sub_graphs=split_graphs
+        outputs = await self._run_sub_graphs(
+            sub_graph_inputs=batches, sub_graphs=split_graphs, loop=loop
         )
         return self.condense_inputs(outputs)
 
@@ -200,7 +265,9 @@ def run(
                         f"{self.ROUTER.START_ROUTE}"
                     )
 
-                operator_output = self._apply_split(operator_output, inference_state)
+                operator_output = asyncio.run(
+                    self._apply_split(operator_output, inference_state)
+                )
                 next_step = self.router.route[self.router.JOIN_ROUTE]
                 if next_step == self.router.END_ROUTE:
                     return operator_output
@@ -232,8 +299,10 @@ def run(
                     end=[self.router.SPLIT_ROUTE, self.router.END_ROUTE],
                 )
 
-                operator_output = self._run_sub_graphs(
-                    sub_graph_inputs=[operator_output], sub_graphs=[graph]
+                operator_output = asyncio.run(
+                    self._run_sub_graphs(
+                        sub_graph_inputs=[operator_output], sub_graphs=[graph]
+                    )
                 )[0]
 
                 inference_state = graph.inf

diff --git a/src/deepsparse/v2/schedulers/scheduler.py b/src/deepsparse/v2/schedulers/scheduler.py
@@ -13,8 +13,9 @@
 # limitations under the License.
 
 
+import asyncio
 from concurrent.futures import Future, ThreadPoolExecutor
-from typing import Callable
+from typing import Callable, Optional
 
 from deepsparse.v2.operators import Operator
 
@@ -37,6 +38,21 @@ class OperatorScheduler:
     def __init__(self, max_workers: int = 1):
         self._threadpool = ThreadPoolExecutor(max_workers=max_workers)
 
+    def async_run(
+        self,
+        *args,
+        operator: Operator,
+        loop: Optional[asyncio.AbstractEventLoop],
+        **kwargs,
+    ) -> asyncio.Future:
+        import functools
+
+        """Use an asyncio event loop to run the operator"""
+
+        return loop.run_in_executor(
+            self._threadpool, functools.partial(operator, *args, **kwargs)
+        )
+
     def submit(
         self,
         *args,
@@ -47,11 +63,7 @@ def submit(
         :param operator: operator to run
         :return: future referencing the asynchronously run output of the operator
         """
-        return self._threadpool.submit(
-            operator,
-            *args,
-            **kwargs,
-        )
+        return self._threadpool.submit(operator, *args, **kwargs)
 
     def can_process(
         self,

diff --git a/src/deepsparse/v2/schedulers/scheduler_group.py b/src/deepsparse/v2/schedulers/scheduler_group.py
@@ -14,7 +14,7 @@
 
 
 from concurrent.futures import Future
-from typing import List
+from typing import Any, List
 
 from deepsparse.v2.operators import Operator
 from deepsparse.v2.schedulers.scheduler import OperatorScheduler
@@ -38,6 +38,7 @@ def submit(
         self,
         *args,
         operator: Operator,
+        loop: Any = None,
         **kwargs,
     ) -> Future:
         """
@@ -50,6 +51,10 @@ def submit(
                 operator=operator,
                 **kwargs,
             ):
+                if loop:
+                    return scheduler.async_run(
+                        *args, operator=operator, loop=loop, **kwargs
+                    )
                 return scheduler.submit(
                     *args,
                     operator=operator,