From 56d3e7cf9ae9d8e963518a02664c2b4c8d650362 Mon Sep 17 00:00:00 2001 From: Yuhong Guo Date: Sat, 9 Feb 2019 19:49:31 +0800 Subject: [PATCH 1/4] Enlarge waiting time in test_actors_and_tasks_with_gpus --- test/actor_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/actor_test.py b/test/actor_test.py index 8a4d2ea3f992..15feef997c6f 100644 --- a/test/actor_test.py +++ b/test/actor_test.py @@ -1074,7 +1074,7 @@ def check_intervals_non_overlapping(list_of_intervals): @ray.remote(num_gpus=1) def f1(): t1 = time.monotonic() - time.sleep(0.2) + time.sleep(0.4) t2 = time.monotonic() gpu_ids = ray.get_gpu_ids() assert len(gpu_ids) == 1 @@ -1085,7 +1085,7 @@ def f1(): @ray.remote(num_gpus=2) def f2(): t1 = time.monotonic() - time.sleep(0.2) + time.sleep(0.4) t2 = time.monotonic() gpu_ids = ray.get_gpu_ids() assert len(gpu_ids) == 2 From 8ddbf3b75faeaed0f72f0f6cfcdad7cda644f9e4 Mon Sep 17 00:00:00 2001 From: Yuhong Guo Date: Sun, 10 Feb 2019 15:34:30 +0800 Subject: [PATCH 2/4] Add log to object_manager_test.cc --- src/ray/object_manager/test/object_manager_test.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/ray/object_manager/test/object_manager_test.cc b/src/ray/object_manager/test/object_manager_test.cc index 904f1ed2a83c..3c2f20573533 100644 --- a/src/ray/object_manager/test/object_manager_test.cc +++ b/src/ray/object_manager/test/object_manager_test.cc @@ -419,17 +419,23 @@ class TestObjectManager : public TestObjectManagerBase { case 0: { // Ensure timeout_ms = 0 returns expected number of found and remaining // objects. + RAY_LOG(INFO) << "found.size()=" << found.size() + << ", required_objects=" << required_objects; ASSERT_TRUE(found.size() <= required_objects); ASSERT_TRUE(static_cast(found.size() + remaining.size()) == num_objects); NextWaitTest(); } break; case 1: { // Ensure lookup succeeds as expected when timeout_ms = 1000. + RAY_LOG(INFO) << "found.size()=" << found.size() + << ", required_objects=" << required_objects; ASSERT_TRUE(found.size() >= required_objects); ASSERT_TRUE(static_cast(found.size() + remaining.size()) == num_objects); NextWaitTest(); } break; case 2: { + RAY_LOG(INFO) << "found.size()=" << found.size() + << ", required_objects=" << required_objects; // Ensure lookup succeeds as expected when objects are local. ASSERT_TRUE(found.size() >= required_objects); ASSERT_TRUE(static_cast(found.size() + remaining.size()) == num_objects); From 76ff83a1115868674285db3f5e17e2cbec6f21fa Mon Sep 17 00:00:00 2001 From: Yuhong Guo Date: Sun, 10 Feb 2019 17:41:35 +0800 Subject: [PATCH 3/4] Increase time --- src/ray/object_manager/test/object_manager_test.cc | 14 ++++---------- test/actor_test.py | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/ray/object_manager/test/object_manager_test.cc b/src/ray/object_manager/test/object_manager_test.cc index 3c2f20573533..954afab08312 100644 --- a/src/ray/object_manager/test/object_manager_test.cc +++ b/src/ray/object_manager/test/object_manager_test.cc @@ -352,7 +352,7 @@ class TestObjectManager : public TestObjectManagerBase { case 2: { // Generate objects locally to ensure local object code-path works properly. // Out of 5 objects, we expect 3 ready objects and 2 remaining objects. - TestWait(100, 5, 3, 1000, false, /*test_local=*/true); + TestWait(100, 5, 3, 2000, false, /*test_local=*/true); } break; case 3: { // Wait on an object that's never registered with GCS to ensure timeout works @@ -391,9 +391,9 @@ class TestObjectManager : public TestObjectManagerBase { const std::vector &remaining) { int64_t elapsed = (boost::posix_time::second_clock::local_time() - start_time) .total_milliseconds(); - RAY_LOG(DEBUG) << "elapsed " << elapsed; - RAY_LOG(DEBUG) << "found " << found.size(); - RAY_LOG(DEBUG) << "remaining " << remaining.size(); + RAY_LOG(INFO) << "elapsed " << elapsed; + RAY_LOG(INFO) << "found " << found.size(); + RAY_LOG(INFO) << "remaining " << remaining.size(); // Ensure object order is preserved for all invocations. uint j = 0; @@ -419,23 +419,17 @@ class TestObjectManager : public TestObjectManagerBase { case 0: { // Ensure timeout_ms = 0 returns expected number of found and remaining // objects. - RAY_LOG(INFO) << "found.size()=" << found.size() - << ", required_objects=" << required_objects; ASSERT_TRUE(found.size() <= required_objects); ASSERT_TRUE(static_cast(found.size() + remaining.size()) == num_objects); NextWaitTest(); } break; case 1: { // Ensure lookup succeeds as expected when timeout_ms = 1000. - RAY_LOG(INFO) << "found.size()=" << found.size() - << ", required_objects=" << required_objects; ASSERT_TRUE(found.size() >= required_objects); ASSERT_TRUE(static_cast(found.size() + remaining.size()) == num_objects); NextWaitTest(); } break; case 2: { - RAY_LOG(INFO) << "found.size()=" << found.size() - << ", required_objects=" << required_objects; // Ensure lookup succeeds as expected when objects are local. ASSERT_TRUE(found.size() >= required_objects); ASSERT_TRUE(static_cast(found.size() + remaining.size()) == num_objects); diff --git a/test/actor_test.py b/test/actor_test.py index 15feef997c6f..230a5db30546 100644 --- a/test/actor_test.py +++ b/test/actor_test.py @@ -1186,7 +1186,7 @@ def test_actors_and_tasks_with_gpus_version_two(shutdown_only): @ray.remote(num_gpus=1) def f(): - time.sleep(4) + time.sleep(5) gpu_ids = ray.get_gpu_ids() assert len(gpu_ids) == 1 return gpu_ids[0] From 2eccbb441c87bef44606052327f9def0db18b52f Mon Sep 17 00:00:00 2001 From: Yuhong Guo Date: Sun, 10 Feb 2019 19:35:51 +0800 Subject: [PATCH 4/4] use _random_string to replace random_string, increase object size in object_manager_test.cc --- python/ray/tune/trial.py | 4 ++-- python/ray/worker.py | 12 ++++++------ .../object_manager/test/object_manager_test.cc | 16 ++++++++-------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py index 3ef0b5027a46..7f32d3941ed8 100644 --- a/python/ray/tune/trial.py +++ b/python/ray/tune/trial.py @@ -26,7 +26,7 @@ import ray.tune.registry from ray.tune.result import (DEFAULT_RESULTS_DIR, DONE, HOSTNAME, PID, TIME_TOTAL_S, TRAINING_ITERATION, TIMESTEPS_TOTAL) -from ray.utils import random_string, binary_to_hex, hex_to_binary +from ray.utils import _random_string, binary_to_hex, hex_to_binary DEBUG_PRINT_INTERVAL = 5 MAX_LEN_IDENTIFIER = 130 @@ -311,7 +311,7 @@ def _registration_check(cls, trainable_name): @classmethod def generate_id(cls): - return binary_to_hex(random_string())[:8] + return binary_to_hex(_random_string())[:8] def init_logger(self): """Init logger.""" diff --git a/python/ray/worker.py b/python/ray/worker.py index fb3ad9025c38..f66616b3800c 100644 --- a/python/ray/worker.py +++ b/python/ray/worker.py @@ -38,7 +38,7 @@ from ray import profiling from ray.function_manager import (FunctionActorManager, FunctionDescriptor) import ray.parameter -from ray.utils import (check_oversized_pickle, is_cython, random_string, +from ray.utils import (check_oversized_pickle, is_cython, _random_string, thread_safe_client, setup_logger) SCRIPT_MODE = 0 @@ -186,7 +186,7 @@ def task_context(self): # to the current task ID may not be correct. Generate a # random task ID so that the backend can differentiate # between different threads. - self._task_context.current_task_id = TaskID(random_string()) + self._task_context.current_task_id = TaskID(_random_string()) if getattr(self, '_multithreading_warned', False) is not True: logger.warning( "Calling ray.get or ray.wait in a separate thread " @@ -1753,13 +1753,13 @@ def connect(info, # Initialize some fields. if mode is WORKER_MODE: - worker.worker_id = random_string() + worker.worker_id = _random_string() if setproctitle: setproctitle.setproctitle("ray_worker") else: # This is the code path of driver mode. if driver_id is None: - driver_id = DriverID(random_string()) + driver_id = DriverID(_random_string()) if not isinstance(driver_id, DriverID): raise Exception("The type of given driver id must be DriverID.") @@ -1911,7 +1911,7 @@ def connect(info, function_descriptor.get_function_descriptor_list(), [], # arguments. 0, # num_returns. - TaskID(random_string()), # parent_task_id. + TaskID(_random_string()), # parent_task_id. 0, # parent_counter. ActorID.nil(), # actor_creation_id. ObjectID.nil(), # actor_creation_dummy_object_id. @@ -2164,7 +2164,7 @@ def register_custom_serializer(cls, else: # In this case, the class ID only needs to be meaningful on this # worker and not across workers. - class_id = random_string() + class_id = _random_string() # Make sure class_id is a string. class_id = ray.utils.binary_to_hex(class_id) diff --git a/src/ray/object_manager/test/object_manager_test.cc b/src/ray/object_manager/test/object_manager_test.cc index 954afab08312..a6d79d608e97 100644 --- a/src/ray/object_manager/test/object_manager_test.cc +++ b/src/ray/object_manager/test/object_manager_test.cc @@ -342,27 +342,27 @@ class TestObjectManager : public TestObjectManagerBase { case 0: { // Ensure timeout_ms = 0 is handled correctly. // Out of 5 objects, we expect 3 ready objects and 2 remaining objects. - TestWait(100, 5, 3, /*timeout_ms=*/0, false, false); + TestWait(600, 5, 3, /*timeout_ms=*/0, false, false); } break; case 1: { // Ensure timeout_ms = 1000 is handled correctly. // Out of 5 objects, we expect 3 ready objects and 2 remaining objects. - TestWait(100, 5, 3, /*timeout_ms=*/1000, false, false); + TestWait(600, 5, 3, /*timeout_ms=*/1000, false, false); } break; case 2: { // Generate objects locally to ensure local object code-path works properly. // Out of 5 objects, we expect 3 ready objects and 2 remaining objects. - TestWait(100, 5, 3, 2000, false, /*test_local=*/true); + TestWait(600, 5, 3, 1000, false, /*test_local=*/true); } break; case 3: { // Wait on an object that's never registered with GCS to ensure timeout works // properly. - TestWait(100, /*num_objects=*/5, /*required_objects=*/6, 1000, + TestWait(600, /*num_objects=*/5, /*required_objects=*/6, 1000, /*include_nonexistent=*/true, false); } break; case 4: { // Ensure infinite time code-path works properly. - TestWait(100, 5, 5, /*timeout_ms=*/-1, false, false); + TestWait(600, 5, 5, /*timeout_ms=*/-1, false, false); } break; } } @@ -391,9 +391,9 @@ class TestObjectManager : public TestObjectManagerBase { const std::vector &remaining) { int64_t elapsed = (boost::posix_time::second_clock::local_time() - start_time) .total_milliseconds(); - RAY_LOG(INFO) << "elapsed " << elapsed; - RAY_LOG(INFO) << "found " << found.size(); - RAY_LOG(INFO) << "remaining " << remaining.size(); + RAY_LOG(DEBUG) << "elapsed " << elapsed; + RAY_LOG(DEBUG) << "found " << found.size(); + RAY_LOG(DEBUG) << "remaining " << remaining.size(); // Ensure object order is preserved for all invocations. uint j = 0;