diff --git a/rclcpp_action/src/server.cpp b/rclcpp_action/src/server.cpp index bcf89a6d96..f21133d3ff 100644 --- a/rclcpp_action/src/server.cpp +++ b/rclcpp_action/src/server.cpp @@ -45,7 +45,7 @@ class ServerBaseImpl { } - // Lock everything except user callbacks + // Lock for action_server_ std::recursive_mutex action_server_reentrant_mutex_; std::shared_ptr action_server_; @@ -58,10 +58,13 @@ class ServerBaseImpl size_t num_services_ = 0; size_t num_guard_conditions_ = 0; - bool goal_request_ready_ = false; - bool cancel_request_ready_ = false; - bool result_request_ready_ = false; - bool goal_expired_ = false; + std::atomic goal_request_ready_{false}; + std::atomic cancel_request_ready_{false}; + std::atomic result_request_ready_{false}; + std::atomic goal_expired_{false}; + + // Lock for unordered_maps + std::recursive_mutex unordered_map_mutex_; // Results to be kept until the goal expires after reaching a terminal state std::unordered_map> goal_results_; @@ -171,29 +174,41 @@ ServerBase::add_to_wait_set(rcl_wait_set_t * wait_set) bool ServerBase::is_ready(rcl_wait_set_t * wait_set) { - std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); - rcl_ret_t ret = rcl_action_server_wait_set_get_entities_ready( - wait_set, - pimpl_->action_server_.get(), - &pimpl_->goal_request_ready_, - &pimpl_->cancel_request_ready_, - &pimpl_->result_request_ready_, - &pimpl_->goal_expired_); + bool goal_request_ready; + bool cancel_request_ready; + bool result_request_ready; + bool goal_expired; + rcl_ret_t ret; + { + std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); + ret = rcl_action_server_wait_set_get_entities_ready( + wait_set, + pimpl_->action_server_.get(), + &goal_request_ready, + &cancel_request_ready, + &result_request_ready, + &goal_expired); + } + + pimpl_->goal_request_ready_ = goal_request_ready; + pimpl_->cancel_request_ready_ = cancel_request_ready; + pimpl_->result_request_ready_ = result_request_ready; + pimpl_->goal_expired_ = goal_expired; if (RCL_RET_OK != ret) { rclcpp::exceptions::throw_from_rcl_error(ret); } - return pimpl_->goal_request_ready_ || - pimpl_->cancel_request_ready_ || - pimpl_->result_request_ready_ || - pimpl_->goal_expired_; + return pimpl_->goal_request_ready_.load() || + pimpl_->cancel_request_ready_.load() || + pimpl_->result_request_ready_.load() || + pimpl_->goal_expired_.load(); } std::shared_ptr ServerBase::take_data() { - if (pimpl_->goal_request_ready_) { + if (pimpl_->goal_request_ready_.load()) { rcl_ret_t ret; rcl_action_goal_info_t goal_info = rcl_action_get_zero_initialized_goal_info(); rmw_request_id_t request_header; @@ -212,7 +227,7 @@ ServerBase::take_data() ret, goal_info, request_header, message)); - } else if (pimpl_->cancel_request_ready_) { + } else if (pimpl_->cancel_request_ready_.load()) { rcl_ret_t ret; rmw_request_id_t request_header; @@ -229,7 +244,7 @@ ServerBase::take_data() std::make_shared , rmw_request_id_t>>(ret, request, request_header)); - } else if (pimpl_->result_request_ready_) { + } else if (pimpl_->result_request_ready_.load()) { rcl_ret_t ret; // Get the result request message rmw_request_id_t request_header; @@ -241,7 +256,7 @@ ServerBase::take_data() return std::static_pointer_cast( std::make_shared, rmw_request_id_t>>( ret, result_request, request_header)); - } else if (pimpl_->goal_expired_) { + } else if (pimpl_->goal_expired_.load()) { return nullptr; } else { throw std::runtime_error("Taking data from action server but nothing is ready"); @@ -251,17 +266,17 @@ ServerBase::take_data() void ServerBase::execute(std::shared_ptr & data) { - if (!data && !pimpl_->goal_expired_) { + if (!data && !pimpl_->goal_expired_.load()) { throw std::runtime_error("'data' is empty"); } - if (pimpl_->goal_request_ready_) { + if (pimpl_->goal_request_ready_.load()) { execute_goal_request_received(data); - } else if (pimpl_->cancel_request_ready_) { + } else if (pimpl_->cancel_request_ready_.load()) { execute_cancel_request_received(data); - } else if (pimpl_->result_request_ready_) { + } else if (pimpl_->result_request_ready_.load()) { execute_result_request_received(data); - } else if (pimpl_->goal_expired_) { + } else if (pimpl_->goal_expired_.load()) { execute_check_expired_goals(); } else { throw std::runtime_error("Executing action server but nothing is ready"); @@ -286,10 +301,10 @@ ServerBase::execute_goal_request_received(std::shared_ptr & data) rmw_request_id_t request_header = std::get<2>(*shared_ptr); std::shared_ptr message = std::get<3>(*shared_ptr); - - std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); - - pimpl_->goal_request_ready_ = false; + bool expected = true; + if (!pimpl_->goal_request_ready_.compare_exchange_strong(expected, false)) { + return; + } GoalUUID uuid = get_goal_id_from_goal_request(message.get()); convert(uuid, &goal_info); @@ -297,10 +312,13 @@ ServerBase::execute_goal_request_received(std::shared_ptr & data) // Call user's callback, getting the user's response and a ros message to send back auto response_pair = call_handle_goal_callback(uuid, message); - ret = rcl_action_send_goal_response( - pimpl_->action_server_.get(), - &request_header, - response_pair.second.get()); + { + std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); + ret = rcl_action_send_goal_response( + pimpl_->action_server_.get(), + &request_header, + response_pair.second.get()); + } if (RCL_RET_OK != ret) { rclcpp::exceptions::throw_from_rcl_error(ret); @@ -325,7 +343,10 @@ ServerBase::execute_goal_request_received(std::shared_ptr & data) } }; rcl_action_goal_handle_t * rcl_handle; - rcl_handle = rcl_action_accept_new_goal(pimpl_->action_server_.get(), &goal_info); + { + std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); + rcl_handle = rcl_action_accept_new_goal(pimpl_->action_server_.get(), &goal_info); + } if (!rcl_handle) { throw std::runtime_error("Failed to accept new goal\n"); } @@ -334,7 +355,10 @@ ServerBase::execute_goal_request_received(std::shared_ptr & data) // Copy out goal handle since action server storage disappears when it is fini'd *handle = *rcl_handle; - pimpl_->goal_handles_[uuid] = handle; + { + std::lock_guard lock(pimpl_->unordered_map_mutex_); + pimpl_->goal_handles_[uuid] = handle; + } if (GoalResponse::ACCEPT_AND_EXECUTE == status) { // Change status to executing @@ -359,7 +383,6 @@ ServerBase::execute_cancel_request_received(std::shared_ptr & data) , rmw_request_id_t>>(data); auto ret = std::get<0>(*shared_ptr); - std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); if (RCL_RET_ACTION_SERVER_TAKE_FAILED == ret) { // Ignore take failure because connext fails if it receives a sample without valid data. // This happens when a client shuts down and connext receives a sample saying the client is @@ -380,10 +403,14 @@ ServerBase::execute_cancel_request_received(std::shared_ptr & data) // Get a list of goal info that should be attempted to be cancelled rcl_action_cancel_response_t cancel_response = rcl_action_get_zero_initialized_cancel_response(); - ret = rcl_action_process_cancel_request( - pimpl_->action_server_.get(), - &cancel_request, - &cancel_response); + { + std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); + ret = rcl_action_process_cancel_request( + pimpl_->action_server_.get(), + &cancel_request, + &cancel_response); + } + if (RCL_RET_OK != ret) { rclcpp::exceptions::throw_from_rcl_error(ret); } @@ -426,8 +453,12 @@ ServerBase::execute_cancel_request_received(std::shared_ptr & data) publish_status(); } - ret = rcl_action_send_cancel_response( - pimpl_->action_server_.get(), &request_header, response.get()); + { + std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); + ret = rcl_action_send_cancel_response( + pimpl_->action_server_.get(), &request_header, response.get()); + } + if (RCL_RET_OK != ret) { rclcpp::exceptions::throw_from_rcl_error(ret); } @@ -440,7 +471,6 @@ ServerBase::execute_result_request_received(std::shared_ptr & data) auto shared_ptr = std::static_pointer_cast , rmw_request_id_t>>(data); auto ret = std::get<0>(*shared_ptr); - std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); if (RCL_RET_ACTION_SERVER_TAKE_FAILED == ret) { // Ignore take failure because connext fails if it receives a sample without valid data. // This happens when a client shuts down and connext receives a sample saying the client is @@ -460,7 +490,10 @@ ServerBase::execute_result_request_received(std::shared_ptr & data) rcl_action_goal_info_t goal_info; convert(uuid, &goal_info); bool goal_exists; - goal_exists = rcl_action_server_goal_exists(pimpl_->action_server_.get(), &goal_info); + { + std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); + goal_exists = rcl_action_server_goal_exists(pimpl_->action_server_.get(), &goal_info); + } if (!goal_exists) { // Goal does not exists result_response = create_result_response(action_msgs::msg::GoalStatus::STATUS_UNKNOWN); @@ -474,6 +507,7 @@ ServerBase::execute_result_request_received(std::shared_ptr & data) if (result_response) { // Send the result now + std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); rcl_ret_t rcl_ret = rcl_action_send_result_response( pimpl_->action_server_.get(), &request_header, result_response.get()); if (RCL_RET_OK != rcl_ret) { @@ -481,6 +515,7 @@ ServerBase::execute_result_request_received(std::shared_ptr & data) } } else { // Store the request so it can be responded to later + std::lock_guard lock(pimpl_->unordered_map_mutex_); pimpl_->result_requests_[uuid].push_back(request_header); } data.reset(); @@ -495,9 +530,11 @@ ServerBase::execute_check_expired_goals() // Loop in case more than 1 goal expired while (num_expired > 0u) { - std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); rcl_ret_t ret; - ret = rcl_action_expire_goals(pimpl_->action_server_.get(), expired_goals, 1, &num_expired); + { + std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); + ret = rcl_action_expire_goals(pimpl_->action_server_.get(), expired_goals, 1, &num_expired); + } if (RCL_RET_OK != ret) { rclcpp::exceptions::throw_from_rcl_error(ret); } else if (num_expired) { @@ -505,6 +542,7 @@ ServerBase::execute_check_expired_goals() GoalUUID uuid; convert(expired_goals[0], &uuid); RCLCPP_DEBUG(pimpl_->logger_, "Expired goal %s", to_string(uuid).c_str()); + std::lock_guard lock(pimpl_->unordered_map_mutex_); pimpl_->goal_results_.erase(uuid); pimpl_->result_requests_.erase(uuid); pimpl_->goal_handles_.erase(uuid); @@ -577,20 +615,26 @@ ServerBase::publish_result(const GoalUUID & uuid, std::shared_ptr result_m // Check that the goal exists rcl_action_goal_info_t goal_info; convert(uuid, &goal_info); - std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); bool goal_exists; - goal_exists = rcl_action_server_goal_exists(pimpl_->action_server_.get(), &goal_info); + { + std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); + goal_exists = rcl_action_server_goal_exists(pimpl_->action_server_.get(), &goal_info); + } if (!goal_exists) { throw std::runtime_error("Asked to publish result for goal that does not exist"); } - pimpl_->goal_results_[uuid] = result_msg; + { + std::lock_guard lock(pimpl_->unordered_map_mutex_); + pimpl_->goal_results_[uuid] = result_msg; + } // if there are clients who already asked for the result, send it to them auto iter = pimpl_->result_requests_.find(uuid); if (iter != pimpl_->result_requests_.end()) { for (auto & request_header : iter->second) { + std::lock_guard lock(pimpl_->action_server_reentrant_mutex_); rcl_ret_t ret = rcl_action_send_result_response( pimpl_->action_server_.get(), &request_header, result_msg.get()); if (RCL_RET_OK != ret) { diff --git a/rclcpp_action/test/test_server.cpp b/rclcpp_action/test/test_server.cpp index bd840b4168..5fc2b7ee29 100644 --- a/rclcpp_action/test/test_server.cpp +++ b/rclcpp_action/test/test_server.cpp @@ -1221,3 +1221,92 @@ TEST_F(TestCancelRequestServer, publish_status_send_cancel_response_errors) EXPECT_THROW(SendClientCancelRequest(), std::runtime_error); } + +class TestDeadlockServer : public TestServer +{ +public: + void SetUp() + { + node_ = std::make_shared("goal_request", "/rclcpp_action/goal_request"); + uuid1_ = {{1, 2, 3, 4, 5, 6, 70, 80, 9, 1, 11, 120, 13, 140, 15, 160}}; + uuid2_ = {{2, 2, 3, 4, 5, 6, 70, 80, 9, 1, 11, 120, 13, 140, 15, 160}}; + action_server_ = rclcpp_action::create_server( + node_, "fibonacci", + [this](const GoalUUID &, std::shared_ptr) { + // instead of making a deadlock, check if it can acquire the lock in a second + std::unique_lock lock(server_mutex_, std::defer_lock); + this->TryLockFor(lock, std::chrono::milliseconds(1000)); + return rclcpp_action::GoalResponse::ACCEPT_AND_EXECUTE; + }, + [this](std::shared_ptr) { + // instead of making a deadlock, check if it can acquire the lock in a second + std::unique_lock lock(server_mutex_, std::defer_lock); + this->TryLockFor(lock, std::chrono::milliseconds(1000)); + return rclcpp_action::CancelResponse::ACCEPT; + }, + [this](std::shared_ptr handle) { + // instead of making a deadlock, check if it can acquire the lock in a second + std::unique_lock lock(server_mutex_, std::defer_lock); + this->TryLockFor(lock, std::chrono::milliseconds(1000)); + goal_handle_ = handle; + }); + } + + void GoalSucceeded() + { + std::lock_guard lock(server_mutex_); + rclcpp::sleep_for(std::chrono::milliseconds(100)); + auto result = std::make_shared(); + result->sequence = {5, 8, 13, 21}; + goal_handle_->succeed(result); + } + + void GoalCanceled() + { + std::lock_guard lock(server_mutex_); + rclcpp::sleep_for(std::chrono::milliseconds(100)); + auto result = std::make_shared(); + goal_handle_->canceled(result); + } + + void TryLockFor( + std::unique_lock & lock, + std::chrono::milliseconds timeout + ) + { + ASSERT_TRUE(lock.try_lock_for(timeout)); + } + +protected: + std::recursive_timed_mutex server_mutex_; + GoalUUID uuid1_, uuid2_; + std::shared_ptr node_; + std::shared_ptr> action_server_; + + using GoalHandle = rclcpp_action::ServerGoalHandle; + std::shared_ptr goal_handle_; +}; + +TEST_F(TestDeadlockServer, deadlock_while_succeed) +{ + send_goal_request(node_, uuid1_); + // this will lock wrapper's mutex and intentionally wait 100ms for calling succeed + // to try to acquire the lock of rclcpp_action mutex + std::thread t(&TestDeadlockServer::GoalSucceeded, this); + // after the wrapper's mutex is locked and before succeed is called + rclcpp::sleep_for(std::chrono::milliseconds(50)); + // call next goal request to intentionally reproduce deadlock + // this first locks rclcpp_action mutex and then call callback to lock wrapper's mutex + send_goal_request(node_, uuid2_); + t.join(); +} + +TEST_F(TestDeadlockServer, deadlock_while_canceled) +{ + send_goal_request(node_, uuid1_); + send_cancel_request(node_, uuid1_); + std::thread t(&TestDeadlockServer::GoalCanceled, this); + rclcpp::sleep_for(std::chrono::milliseconds(50)); + send_goal_request(node_, uuid2_); // deadlock here + t.join(); +}