Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split python/ray/tests/test_actor_retry over two files #47188

Merged
merged 4 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ci/ray_ci/core.tests.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
flaky_tests:
- windows://:metric_exporter_grpc_test
- windows://python/ray/tests:test_actor_retry
- windows://python/ray/tests:test_actor_retry1
- windows://python/ray/tests:test_actor_retry2
- windows://python/ray/tests:test_object_spilling
- windows://python/ray/tests:test_object_spilling_asan
- windows://python/ray/tests:test_object_spilling_debug_mode
Expand Down
3 changes: 2 additions & 1 deletion python/ray/tests/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,8 @@ py_test_module_list(
py_test_module_list(
files = [
"test_actor.py",
"test_actor_retry.py",
"test_actor_retry1.py",
"test_actor_retry2.py",
"test_actor_failures.py",
"test_actor_resources.py",
"test_failure.py",
Expand Down
155 changes: 155 additions & 0 deletions python/ray/tests/test_actor_retry1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import os
import sys

import pytest

import ray


class MyError(Exception):
pass


@ray.remote
class Counter:
def __init__(self) -> None:
self.count = 0

def increment(self) -> int:
c = self.count
self.count += 1
return c

def get_count(self) -> int:
return self.count


# TODO: also do work for async and threaded actors
@ray.remote(max_task_retries=3)
class TroubleMaker:
@ray.method(max_task_retries=5, retry_exceptions=[MyError])
def may_raise_n_times(self, counter, n):
"""
Raises if there were n calls before this call.
Returns the number of calls before this call, if it's > n.
"""
c = ray.get(counter.increment.remote())
print(f"may_raise_n_times, n = {n}, count = {c}")
if c < n:
print(f"method raises in {c} th call, want {n} times")
raise MyError()
return c

@ray.method(retry_exceptions=[MyError])
def raise_or_exit(self, counter, actions):
"""
Increments the counter and performs an action based on the param actions[count].
If count >= len(actions), return the count.

Note: This method doesn't set `max_task_retries`. Ray expects it to inherit
max_task_retries = 3.

@param actions: List["raise" | "exit"]
"""

c = ray.get(counter.increment.remote())
action = "return" if c >= len(actions) else actions[c]
print(f"raise_or_exit, action = {action}, count = {c}")

if action == "raise":
raise MyError()
elif action == "exit":
sys.exit(1)
else:
return c


def test_method_raise_5_times(shutdown_only):
counter = Counter.remote()
trouble_maker = TroubleMaker.remote()
assert ray.get(trouble_maker.may_raise_n_times.remote(counter, 5)) == 5
assert ray.get(counter.get_count.remote()) == 6


def test_method_raise_no_over_retry(shutdown_only):
counter = Counter.remote()
trouble_maker = TroubleMaker.remote()
with pytest.raises(MyError):
ray.get(trouble_maker.may_raise_n_times.remote(counter, 6))
assert ray.get(counter.get_count.remote()) == 6


def test_method_no_retry_without_retry_exceptions(shutdown_only):
counter = Counter.remote()
trouble_maker = TroubleMaker.remote()
with pytest.raises(MyError):
ray.get(
trouble_maker.may_raise_n_times.options(retry_exceptions=False).remote(
counter, 5
)
)
assert ray.get(counter.get_count.remote()) == 1


def test_options_takes_precedence(shutdown_only):
counter = Counter.remote()
trouble_maker = TroubleMaker.remote()
assert (
ray.get(
trouble_maker.may_raise_n_times.options(max_task_retries=10).remote(
counter, 10
)
)
== 10
)
assert ray.get(counter.get_count.remote()) == 11


def test_options_takes_precedence_no_over_retry(shutdown_only):
counter = Counter.remote()
trouble_maker = TroubleMaker.remote()

with pytest.raises(MyError):
ray.get(
trouble_maker.may_raise_n_times.options(max_task_retries=10).remote(
counter, 11
)
)
assert ray.get(counter.get_count.remote()) == 11


def test_method_exit_no_over_retry_max_restarts(shutdown_only):
"""
Even if we have enough max_task_retries, we may still raise due to max_restarts.
"""
counter = Counter.remote()
trouble_maker = TroubleMaker.options(max_restarts=1).remote()
with pytest.raises(ray.exceptions.RayActorError):
assert ray.get(
trouble_maker.raise_or_exit.options(max_task_retries=4).remote(
counter, ["raise", "exit", "exit"]
)
)
# 2 calls: 1 initial + 1 exception-retry + 1 exit-retry (then no more)
assert ray.get(counter.get_count.remote()) == 3


def test_exit_only_no_over_retry(shutdown_only):
"""
Sanity testing: only do exit-retry works
"""
counter = Counter.remote()
trouble_maker = TroubleMaker.options(max_restarts=2).remote()
ray.get(
trouble_maker.raise_or_exit.options(max_task_retries=2).remote(
counter, ["exit", "exit"]
)
) == 2
assert ray.get(counter.get_count.remote()) == 3


if __name__ == "__main__":
if os.environ.get("PARALLEL_CI"):
sys.exit(pytest.main(["-n", "auto", "--boxed", "-vs", __file__]))
else:
sys.exit(pytest.main(["-sv", __file__]))
Original file line number Diff line number Diff line change
Expand Up @@ -112,33 +112,6 @@ async def yield_or_raise(self, counter, actions):
return


def test_method_raise_5_times(shutdown_only):
counter = Counter.remote()
trouble_maker = TroubleMaker.remote()
assert ray.get(trouble_maker.may_raise_n_times.remote(counter, 5)) == 5
assert ray.get(counter.get_count.remote()) == 6


def test_method_raise_no_over_retry(shutdown_only):
counter = Counter.remote()
trouble_maker = TroubleMaker.remote()
with pytest.raises(MyError):
ray.get(trouble_maker.may_raise_n_times.remote(counter, 6))
assert ray.get(counter.get_count.remote()) == 6


def test_method_no_retry_without_retry_exceptions(shutdown_only):
counter = Counter.remote()
trouble_maker = TroubleMaker.remote()
with pytest.raises(MyError):
ray.get(
trouble_maker.may_raise_n_times.options(retry_exceptions=False).remote(
counter, 5
)
)
assert ray.get(counter.get_count.remote()) == 1


def test_generator_method_no_retry_without_retry_exceptions(shutdown_only):
counter = Counter.remote()
trouble_maker = AsyncTroubleMaker.remote()
Expand Down Expand Up @@ -226,33 +199,6 @@ def test_generator_method_does_not_over_retry(shutdown_only):
assert ray.get(counter.get_count.remote()) == 8


def test_options_takes_precedence(shutdown_only):
counter = Counter.remote()
trouble_maker = TroubleMaker.remote()
assert (
ray.get(
trouble_maker.may_raise_n_times.options(max_task_retries=10).remote(
counter, 10
)
)
== 10
)
assert ray.get(counter.get_count.remote()) == 11


def test_options_takes_precedence_no_over_retry(shutdown_only):
counter = Counter.remote()
trouble_maker = TroubleMaker.remote()

with pytest.raises(MyError):
ray.get(
trouble_maker.may_raise_n_times.options(max_task_retries=10).remote(
counter, 11
)
)
assert ray.get(counter.get_count.remote()) == 11


@pytest.mark.parametrize(
"actions",
[
Expand Down Expand Up @@ -353,22 +299,6 @@ def test_method_raise_and_exit_no_over_retry(
assert ray.get(counter.get_count.remote()) == 3


def test_method_exit_no_over_retry_max_restarts(shutdown_only):
"""
Even if we have enough max_task_retries, we may still raise due to max_restarts.
"""
counter = Counter.remote()
trouble_maker = TroubleMaker.options(max_restarts=1).remote()
with pytest.raises(ray.exceptions.RayActorError):
assert ray.get(
trouble_maker.raise_or_exit.options(max_task_retries=4).remote(
counter, ["raise", "exit", "exit"]
)
)
# 2 calls: 1 initial + 1 exception-retry + 1 exit-retry (then no more)
assert ray.get(counter.get_count.remote()) == 3


@pytest.mark.parametrize(
"is_async", [False, True], ids=lambda a: "async" if a else "sync"
)
Expand All @@ -390,20 +320,6 @@ def test_exit_only(is_async, shutdown_only):
assert ray.get(counter.get_count.remote()) == 3


def test_exit_only_no_over_retry(shutdown_only):
"""
Sanity testing: only do exit-retry works
"""
counter = Counter.remote()
trouble_maker = TroubleMaker.options(max_restarts=2).remote()
ray.get(
trouble_maker.raise_or_exit.options(max_task_retries=2).remote(
counter, ["exit", "exit"]
)
) == 2
assert ray.get(counter.get_count.remote()) == 3


if __name__ == "__main__":
if os.environ.get("PARALLEL_CI"):
sys.exit(pytest.main(["-n", "auto", "--boxed", "-vs", __file__]))
Expand Down