ray-project · elibol · Jun 6, 2018 · May 16, 2018 · May 17, 2018 · May 17, 2018
@@ -192,12 +192,13 @@ std::pair<std::vector<ObjectID>, std::vector<ObjectID>> local_scheduler_wait(
     LocalSchedulerConnection *conn,
     const std::vector<ObjectID> &object_ids,
     int num_returns,
-    int64_t timeout,
+    int64_t timeout_milliseconds,
     bool wait_local) {
   // Write request.
   flatbuffers::FlatBufferBuilder fbb;
   auto message = ray::protocol::CreateWaitRequest(
-      fbb, to_flatbuf(fbb, object_ids), num_returns, timeout, wait_local);
+      fbb, to_flatbuf(fbb, object_ids), num_returns, timeout_milliseconds,
+      wait_local);
   fbb.Finish(message);
   write_message(conn->conn, ray::protocol::MessageType_WaitRequest,
                 fbb.GetSize(), fbb.GetBufferPointer());
@@ -206,6 +207,7 @@ std::pair<std::vector<ObjectID>, std::vector<ObjectID>> local_scheduler_wait(
   int64_t reply_size;
   uint8_t *reply;
   read_message(conn->conn, &type, &reply_size, &reply);
+  RAY_CHECK(type == ray::protocol::MessageType_WaitReply);
   auto reply_message = flatbuffers::GetRoot<ray::protocol::WaitReply>(reply);
   // Convert result.
   std::pair<std::vector<ObjectID>, std::vector<ObjectID>> result;
@@ -219,5 +221,7 @@ std::pair<std::vector<ObjectID>, std::vector<ObjectID>> local_scheduler_wait(
     ObjectID object_id = ObjectID::from_binary(remaining->Get(i)->str());
     result.second.push_back(object_id);
   }
+  /* Free the original message from the local scheduler. */
+  free(reply);
   return result;
 }
@@ -175,15 +175,16 @@ void local_scheduler_set_actor_frontier(LocalSchedulerConnection *conn,
 /// \param conn The connection information.
 /// \param object_ids The objects to wait for.
 /// \param num_returns The number of objects to wait for.
-/// \param timeout The duration to wait before returning.
+/// \param timeout_milliseconds Duration, in milliseconds, to wait before
+/// returning.
 /// \param wait_local Whether to wait for objects to appear on this node.
 /// \return A pair with the first element containing the object ids that were
 /// found, and the second element the objects that were not found.
 std::pair<std::vector<ObjectID>, std::vector<ObjectID>> local_scheduler_wait(
     LocalSchedulerConnection *conn,
     const std::vector<ObjectID> &object_ids,
     int num_returns,
-    int64_t timeout,
+    int64_t timeout_milliseconds,
     bool wait_local);
 
 #endif
@@ -112,15 +112,17 @@ ray::Status ObjectDirectory::SubscribeObjectLocations(const UniqueID &callback_i
     status = gcs_client_->object_table().RequestNotifications(
         JobID::nil(), object_id, gcs_client_->client_table().GetLocalClientId());
   }
-  if (listeners_[object_id].callbacks.count(callback_id) > 0) {
+  auto &listener_state = listeners_.find(object_id)->second;
+  // TODO(hme): Make this fatal after implementing Pull suppression.
+  if (listener_state.callbacks.count(callback_id) > 0) {
     return ray::Status::OK();
   }
-  listeners_[object_id].callbacks.emplace(callback_id, callback);
+  listener_state.callbacks.emplace(callback_id, callback);
   // Immediately notify of found object locations.
-  if (!listeners_[object_id].current_object_locations.empty()) {
+  if (!listener_state.current_object_locations.empty()) {
     std::vector<ClientID> client_id_vec(
-        listeners_[object_id].current_object_locations.begin(),
-        listeners_[object_id].current_object_locations.end());
+        listener_state.current_object_locations.begin(),
+        listener_state.current_object_locations.end());
     callback(client_id_vec, object_id);
   }
   return status;

@@ -355,51 +355,56 @@ ray::Status ObjectManager::Cancel(const ObjectID &object_id) {
   return status;
 }
 
-ray::Status ObjectManager::Wait(const std::vector<ObjectID> &object_ids, int64_t wait_ms,
-                                uint64_t num_required_objects, bool wait_local,
-                                const WaitCallback &callback) {
+ray::Status ObjectManager::Wait(const std::vector<ObjectID> &object_ids,
+                                int64_t timeout_ms, uint64_t num_required_objects,
+                                bool wait_local, const WaitCallback &callback) {
   UniqueID wait_id = UniqueID::from_random();
 
   if (wait_local) {
     return ray::Status::NotImplemented("Wait for local objects is not yet implemented.");
   }
 
-  RAY_CHECK(wait_ms >= 0);
+  RAY_CHECK(timeout_ms >= 0 || timeout_ms == -1);
   RAY_CHECK(num_required_objects != 0);
   RAY_CHECK(num_required_objects <= object_ids.size());
   if (object_ids.size() == 0) {
     callback(std::vector<ObjectID>(), std::vector<ObjectID>());
   }
 
   // Initialize fields.
-  active_wait_requests_.emplace(wait_id, WaitState(*main_service_, wait_ms, callback));
+  active_wait_requests_.emplace(wait_id, WaitState(*main_service_, timeout_ms, callback));
   auto &wait_state = active_wait_requests_.find(wait_id)->second;
   wait_state.object_id_order = object_ids;
-  wait_state.wait_ms = wait_ms;
+  wait_state.timeout_ms = timeout_ms;
   wait_state.num_required_objects = num_required_objects;
-  for (auto &oid : object_ids) {
-    if (local_objects_.count(oid) > 0) {
-      wait_state.found.insert(oid);
+  for (auto &object_id : object_ids) {
+    if (local_objects_.count(object_id) > 0) {
+      wait_state.found.insert(object_id);
     } else {
-      wait_state.remaining.insert(oid);
+      wait_state.remaining.insert(object_id);
     }
   }
 
   if (wait_state.remaining.empty()) {
     WaitComplete(wait_id);
   } else {
-    for (auto &oid : wait_state.remaining) {
+    // We invoke lookup calls immediately after checking which objects are local to
+    // obtain current information about the location of remote objects. Thus,
+    // we obtain information about all given objects, regardless of their location.
+    // This is required to ensure we do not bias returning locally available objects
+    // as ready whenever Wait is invoked with a mixture of local and remote objects.
+    for (auto &object_id : wait_state.remaining) {
       // Lookup remaining objects.
-      wait_state.requested_objects.insert(oid);
+      wait_state.requested_objects.insert(object_id);
       RAY_CHECK_OK(object_directory_->LookupLocations(
-          oid, [this, wait_id](const std::vector<ClientID> &client_ids,
-                               const ObjectID &object_id) {
+          object_id, [this, wait_id](const std::vector<ClientID> &client_ids,
+                                     const ObjectID &lookup_object_id) {
             auto &wait_state = active_wait_requests_.find(wait_id)->second;
             if (!client_ids.empty()) {
-              wait_state.remaining.erase(object_id);
-              wait_state.found.insert(object_id);
+              wait_state.remaining.erase(lookup_object_id);
+              wait_state.found.insert(lookup_object_id);
             }
-            wait_state.requested_objects.erase(object_id);
+            wait_state.requested_objects.erase(lookup_object_id);
             if (wait_state.requested_objects.empty()) {
               AllWaitLookupsComplete(wait_id);
             }
@@ -412,47 +417,58 @@ ray::Status ObjectManager::Wait(const std::vector<ObjectID> &object_ids, int64_t
 void ObjectManager::AllWaitLookupsComplete(const UniqueID &wait_id) {
   auto &wait_state = active_wait_requests_.find(wait_id)->second;
   if (wait_state.found.size() >= wait_state.num_required_objects ||
-      wait_state.wait_ms == 0) {
+      wait_state.timeout_ms == 0) {
     // Requirements already satisfied.
     WaitComplete(wait_id);
   } else {
-    for (auto &oid : wait_state.remaining) {
+    // Subscribe to objects in order to ensure Wait-related tests are deterministic.
+    for (auto &object_id : wait_state.object_id_order) {
+      if (wait_state.remaining.count(object_id) == 0) {
+        continue;
+      }
       // Subscribe to object notifications.
-      wait_state.requested_objects.insert(oid);
+      if (active_wait_requests_.find(wait_id) == active_wait_requests_.end()) {
+        // This is possible if an object's location is obtained immediately,
+        // within the current callstack. In this case, WaitComplete has been
+        // invoked already, so we're done.
+        return;
+      }
+      wait_state.requested_objects.insert(object_id);
       RAY_CHECK_OK(object_directory_->SubscribeObjectLocations(
-          wait_id, oid, [this, wait_id](const std::vector<ClientID> &client_ids,
-                                        const ObjectID &object_id) {
+          wait_id, object_id, [this, wait_id](const std::vector<ClientID> &client_ids,
+                                              const ObjectID &subscribe_object_id) {
+            auto object_id_wait_state = active_wait_requests_.find(wait_id);
+            RAY_CHECK(object_id_wait_state != active_wait_requests_.end());
             auto &wait_state = active_wait_requests_.find(wait_id)->second;
-            if (wait_state.remaining.count(object_id) != 0) {
-              wait_state.remaining.erase(object_id);
-              wait_state.found.insert(object_id);
-            }
-            wait_state.requested_objects.erase(object_id);
-            RAY_CHECK_OK(
-                object_directory_->UnsubscribeObjectLocations(wait_id, object_id));
+            RAY_CHECK(wait_state.remaining.erase(subscribe_object_id));
+            wait_state.found.insert(subscribe_object_id);
+            wait_state.requested_objects.erase(subscribe_object_id);
+            RAY_CHECK_OK(object_directory_->UnsubscribeObjectLocations(
+                wait_id, subscribe_object_id));
             if (wait_state.found.size() >= wait_state.num_required_objects) {
               WaitComplete(wait_id);
             }
           }));
     }
-    // Set timeout.
-    // TODO (hme): If we need to just wait for all objects independent of time
-    // (i.e. infinite wait time), determine what the value of wait_ms should be and
-    // skip this call. WaitComplete will be invoked when all objects have locations.
-    wait_state.timeout_timer->async_wait(
-        [this, wait_id](const boost::system::error_code &error_code) {
-          if (error_code.value() != 0) {
-            return;
-          }
-          WaitComplete(wait_id);
-        });
+    if (wait_state.timeout_ms != -1) {
+      wait_state.timeout_timer->async_wait(
+          [this, wait_id](const boost::system::error_code &error_code) {
+            if (error_code.value() != 0) {
+              return;
+            }
+            WaitComplete(wait_id);
+          });
+    }
   }
 }
 
 void ObjectManager::WaitComplete(const UniqueID &wait_id) {
   auto &wait_state = active_wait_requests_.find(wait_id)->second;
-  // If we complete with outstanding requests, then wait_ms should be non-zero.
-  RAY_CHECK(!(wait_state.requested_objects.size() > 0) || wait_state.wait_ms > 0);
+  // If we complete with outstanding requests, then timeout_ms should be non-zero or -1
+  // (infinite wait time).
+  if (!wait_state.requested_objects.empty()) {
+    RAY_CHECK(wait_state.timeout_ms > 0 || wait_state.timeout_ms == -1);
+  }
   // Unsubscribe to any objects that weren't found in the time allotted.
   for (auto &object_id : wait_state.requested_objects) {
     RAY_CHECK_OK(object_directory_->UnsubscribeObjectLocations(wait_id, object_id));

@@ -157,11 +157,13 @@ class ObjectManager : public ObjectManagerInterface {
   /// \param callback Invoked when either timeout_ms is satisfied OR num_ready_objects
   /// is satisfied.
   /// \return Status of whether the wait successfully initiated.
-  ray::Status Wait(const std::vector<ObjectID> &object_ids, int64_t wait_ms,
+  ray::Status Wait(const std::vector<ObjectID> &object_ids, int64_t timeout_ms,
                    uint64_t num_required_objects, bool wait_local,
                    const WaitCallback &callback);
 
  private:
+  friend class TestObjectManager;
+
   ClientID client_id_;
   const ObjectManagerConfig config_;
   std::unique_ptr<ObjectDirectoryInterface> object_directory_;
@@ -197,17 +199,20 @@ class ObjectManager : public ObjectManagerInterface {
   /// Cache of locally available objects.
   std::unordered_map<ObjectID, ObjectInfoT> local_objects_;
 
+  /// This is used as the callback identifier in Pull for
+  /// SubscribeObjectLocations. We only need one identifier because we never need to
+  /// subscribe multiple times to the same object during Pull.
   UniqueID object_directory_pull_callback_id_ = UniqueID::from_random();
 
   struct WaitState {
-    WaitState(asio::io_service &service, int64_t wait_ms, const WaitCallback &callback)
-        : wait_ms(wait_ms),
+    WaitState(asio::io_service &service, int64_t timeout_ms, const WaitCallback &callback)
+        : timeout_ms(timeout_ms),
           timeout_timer(std::unique_ptr<boost::asio::deadline_timer>(
-              new boost::asio::deadline_timer(service,
-                                              boost::posix_time::milliseconds(wait_ms)))),
+              new boost::asio::deadline_timer(
+                  service, boost::posix_time::milliseconds(timeout_ms)))),
           callback(callback) {}
     /// The period of time to wait before invoking the callback.
-    int64_t wait_ms;
+    int64_t timeout_ms;
     /// The timer used whenever wait_ms > 0.
     std::unique_ptr<boost::asio::deadline_timer> timeout_timer;
     /// The callback invoked when WaitCallback is complete.