-
Notifications
You must be signed in to change notification settings - Fork 260
Fix issues with resuming async tasks awaiting a future #1469
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: rolling
Are you sure you want to change the base?
Changes from 38 commits
78c50be
428e4e7
defad88
19e9c0a
534acce
23e334f
6695847
c6f729c
2a35478
e39d310
458a73c
bde1872
faafa26
8c1bb36
be207ce
a79df94
6c19b8e
aa5da61
eab03e3
2eb1cfb
35be3cc
d88d94e
44b0f62
bf46550
91890f3
e3dd676
98a9b7a
bab3ba9
e0f21bf
a0d5592
5a99757
d8db620
5c0408e
991afa0
d27bf64
dfa0904
23f06e0
cf67589
9771f6f
5ce6fbd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -253,12 +253,19 @@ def create_task(self, callback: Callable[..., Any], *args: Any, **kwargs: Any | |
| :param callback: A callback to be run in the executor. | ||
| """ | ||
| task = Task(callback, args, kwargs, executor=self) | ||
| self._call_task_in_next_spin(task) | ||
| return task | ||
|
|
||
| def _call_task_in_next_spin(self, task: Task) -> None: | ||
| """ | ||
| Add a task to the executor to be executed in the next spin. | ||
|
|
||
| :param task: A task to be run in the executor. | ||
| """ | ||
| with self._tasks_lock: | ||
| self._tasks.append((task, None, None)) | ||
| if self._guard: | ||
| self._guard.trigger() | ||
| # Task inherits from Future | ||
| return task | ||
|
|
||
| def create_future(self) -> Future: | ||
| """Create a Future object attached to the Executor.""" | ||
|
|
@@ -624,8 +631,6 @@ async def handler(entity: 'EntityT', gc: GuardCondition, is_shutdown: bool, | |
| task: Task[None] = Task( | ||
| handler, (entity, self._guard, self._is_shutdown, self._work_tracker), | ||
| executor=self) | ||
| with self._tasks_lock: | ||
| self._tasks.append((task, entity, node)) | ||
bjsowa marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return task | ||
|
|
||
| def can_execute(self, entity: 'Entity') -> bool: | ||
|
|
@@ -673,17 +678,17 @@ def _wait_for_ready_callbacks( | |
| tasks = None | ||
| with self._tasks_lock: | ||
| tasks = list(self._tasks) | ||
| if tasks: | ||
| for task, entity, node in tasks: | ||
| if (not task.executing() and not task.done() and | ||
| (node is None or node in nodes_to_use)): | ||
| yielded_work = True | ||
| yield task, entity, node | ||
| with self._tasks_lock: | ||
| # Get rid of any tasks that are done | ||
| self._tasks = list(filter(lambda t_e_n: not t_e_n[0].done(), self._tasks)) | ||
| # Get rid of any tasks that are cancelled | ||
| self._tasks = list(filter(lambda t_e_n: not t_e_n[0].cancelled(), self._tasks)) | ||
| # Tasks that need to be executed again will add themselves back to the executor | ||
| self._tasks = [] | ||
| for task_trio in tasks: | ||
| task, entity, node = task_trio | ||
| if node is None or node in nodes_to_use: | ||
| yielded_work = True | ||
| yield task_trio | ||
| else: | ||
| # Asked not to execute these tasks, so don't do them yet | ||
| with self._tasks_lock: | ||
| self._tasks.append(task_trio) | ||
|
||
|
|
||
| # Gather entities that can be waited on | ||
| subscriptions: List[Subscription[Any, ]] = [] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -66,10 +66,13 @@ def __del__(self) -> None: | |
| 'The following exception was never retrieved: ' + str(self._exception), | ||
| file=sys.stderr) | ||
|
|
||
| def __await__(self) -> Generator[None, None, Optional[T]]: | ||
| def __await__(self) -> Generator['Future[T]', None, Optional[T]]: | ||
| # Yield if the task is not finished | ||
| while self._pending(): | ||
| yield | ||
| if self._pending(): | ||
| # This tells the task to suspend until the future is done | ||
| yield self | ||
bjsowa marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if self._pending(): | ||
| raise RuntimeError('Future awaited a second time before it was done') | ||
| return self.result() | ||
|
|
||
| def _pending(self) -> bool: | ||
|
|
@@ -298,17 +301,7 @@ def __call__(self) -> None: | |
| self._executing = True | ||
|
|
||
| if inspect.iscoroutine(self._handler): | ||
| # Execute a coroutine | ||
| handler = self._handler | ||
| try: | ||
| handler.send(None) | ||
| except StopIteration as e: | ||
| # The coroutine finished; store the result | ||
| self.set_result(e.value) | ||
| self._complete_task() | ||
| except Exception as e: | ||
| self.set_exception(e) | ||
| self._complete_task() | ||
| self._execute_coroutine_step(self._handler) | ||
| else: | ||
| # Execute a normal function | ||
| try: | ||
|
|
@@ -322,6 +315,47 @@ def __call__(self) -> None: | |
| finally: | ||
| self._task_lock.release() | ||
|
|
||
| def _execute_coroutine_step(self, coro: Coroutine[Any, Any, T]) -> None: | ||
| """Execute or resume a coroutine task.""" | ||
| try: | ||
| result = coro.send(None) | ||
| except StopIteration as e: | ||
| # The coroutine finished; store the result | ||
| self.set_result(e.value) | ||
| self._complete_task() | ||
| except Exception as e: | ||
| # The coroutine raised; store the exception | ||
| self.set_exception(e) | ||
| self._complete_task() | ||
| else: | ||
| # The coroutine yielded; suspend the task until it is resumed | ||
| executor = self._executor() | ||
| if executor is None: | ||
| raise RuntimeError( | ||
| 'Task tried to reschedule but no executor was set: ' | ||
| 'tasks should only be initialized through executor.create_task()') | ||
| elif isinstance(result, Future): | ||
| # Schedule the task to resume when the future is done | ||
| self._add_resume_callback(result, executor) | ||
| elif result is None: | ||
| # The coroutine yielded None, schedule the task to resume in the next spin | ||
| executor._call_task_in_next_spin(self) | ||
| else: | ||
| raise TypeError( | ||
| f'Expected coroutine to yield a Future or None, got: {type(result)}') | ||
|
|
||
| def _add_resume_callback(self, future: Future[T], executor: 'Executor') -> None: | ||
| future_executor = future._executor() | ||
| if future_executor is None: | ||
| # The future is not associated with an executor yet, so associate it with ours | ||
| future._set_executor(executor) | ||
| elif future_executor is not executor: | ||
| raise RuntimeError('A task can only await futures associated with the same executor') | ||
|
|
||
| # The future is associated with the same executor, so we can resume the task directly | ||
| # in the done callback | ||
| future.add_done_callback(lambda _: self.__call__()) | ||
This comment was marked as outdated.
Sorry, something went wrong.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I follow. At the time the coroutine yields a future, we know that this future is still pending, so we tell the future to schedule the next call of the task to the executor when the future either finishes or is cancelled. A cancelled future will just return None in the next |
||
|
|
||
| def _complete_task(self) -> None: | ||
| """Cleanup after task finished.""" | ||
| self._handler = None | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -297,6 +297,40 @@ async def coroutine() -> str: | |
| self.assertTrue(future.done()) | ||
| self.assertEqual('Sentinel Result', future.result()) | ||
|
|
||
| def test_create_task_coroutine_yield(self) -> None: | ||
| self.assertIsNotNone(self.node.handle) | ||
| for cls in [SingleThreadedExecutor, EventsExecutor]: | ||
| with self.subTest(cls=cls): | ||
| executor = cls(context=self.context) | ||
| executor.add_node(self.node) | ||
|
|
||
| called1 = False | ||
| called2 = False | ||
|
|
||
| async def coroutine() -> str: | ||
| nonlocal called1 | ||
| nonlocal called2 | ||
| called1 = True | ||
| await asyncio.sleep(0) | ||
| called2 = True | ||
| return 'Sentinel Result' | ||
|
|
||
| future = executor.create_task(coroutine) | ||
| self.assertFalse(future.done()) | ||
| self.assertFalse(called1) | ||
| self.assertFalse(called2) | ||
|
|
||
| executor.spin_once(timeout_sec=0) | ||
| self.assertFalse(future.done()) | ||
| self.assertTrue(called1) | ||
| self.assertFalse(called2) | ||
|
|
||
| executor.spin_once(timeout_sec=1) | ||
This comment was marked as outdated.
Sorry, something went wrong.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does not |
||
| self.assertTrue(future.done()) | ||
| self.assertTrue(called1) | ||
| self.assertTrue(called2) | ||
| self.assertEqual('Sentinel Result', future.result()) | ||
|
|
||
| def test_create_task_coroutine_cancel(self) -> None: | ||
| self.assertIsNotNone(self.node.handle) | ||
| for cls in [SingleThreadedExecutor, EventsExecutor]: | ||
|
|
@@ -319,6 +353,38 @@ async def coroutine() -> str: | |
| self.assertTrue(future.cancelled()) | ||
| self.assertEqual(None, future.result()) | ||
|
|
||
| def test_create_task_coroutine_wake_from_another_thread(self) -> None: | ||
| self.assertIsNotNone(self.node.handle) | ||
|
|
||
| for cls in [SingleThreadedExecutor, MultiThreadedExecutor, EventsExecutor]: | ||
| with self.subTest(cls=cls): | ||
| executor = cls(context=self.context) | ||
| thread_future = executor.create_future() | ||
|
|
||
| async def coroutine(): | ||
| await thread_future | ||
|
|
||
| def future_thread(): | ||
| threading.Event().wait(0.1) # Simulate some work | ||
| thread_future.set_result(None) | ||
|
|
||
| t = threading.Thread(target=future_thread) | ||
|
|
||
| coroutine_future = executor.create_task(coroutine) | ||
|
|
||
| start_time = time.monotonic() | ||
|
|
||
| t.start() | ||
| executor.spin_until_future_complete(coroutine_future, timeout_sec=1.0) | ||
|
|
||
| end_time = time.monotonic() | ||
|
|
||
| self.assertTrue(coroutine_future.done()) | ||
bjsowa marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # The coroutine should take at least 0.1 seconds to complete because it waits for | ||
| # the thread to set the future but nowhere near the 1 second timeout | ||
| assert 0.1 <= end_time - start_time < 0.2 | ||
|
|
||
| def test_create_task_normal_function(self) -> None: | ||
| self.assertIsNotNone(self.node.handle) | ||
| for cls in [SingleThreadedExecutor, EventsExecutor]: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I now realize that we still have some topics to discuss.
We are changing behavior here - previously, the executor always yielded node and entity, even for blocked tasks. Now, any task that is resumed using _call_task_in_next_spin will yield None for entity and node from the second time onwards.
SingleThreadedExecutor and MultiThreadedExecutor ignore these yielded values, but the node argument is used to filter tasks. I believe that unlike before, the new code will keep running tasks created by a removed node for example.
We should discuss this behavior change further before proceeding.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you see any way this can be fixed?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, but that depends on the intended behavior.
If we are okay with the "set and forget" approach, no change is required.
Otherwise, for each node in the executor we can hold a set of running tasks.
When a node is removed from the executor, all tasks related to it should be cancelled.
It's also possible to initialize a task with the node that it originated from as a property instead of this task_trio.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just realized that holding tuples of
(task, entity, node)does not really make sense now as the entity and node information is never added after I removed it from_make_handler. Maybe we should find some other way to track which node created the task?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nadavelkabets any ideas?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Exactly. I would like to discuss this topic with core maintainers to get their opinion.
I'm not sure if we should avoid changing this behavior or if this is a welcomed change.