-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Do not throw from PartitionedOutputBufferManager::getData #3502
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4391bcf
9fd12d4
28f295c
29b779f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -225,17 +225,19 @@ class PartitionedOutputBufferManager { | |
| void deleteResults(const std::string& taskId, int destination); | ||
|
|
||
| // Adds up to 'maxBytes' bytes worth of data for 'destination' from | ||
| // 'taskId'. The sequence number of the data must be >= | ||
| // 'sequence'. If there is no data, 'notify' will be registered and | ||
| // called when there is data or the source is at end. Existing data | ||
| // with a sequence number < sequence is deleted. The caller is | ||
| // 'taskId'. The sequence number of the data must be >= 'sequence'. | ||
| // If there is no buffer associated with the given taskId, returns false. | ||
| // If there is no data, 'notify' will be registered and | ||
| // called when there is data or the source is at end, the function returns | ||
| // true. | ||
| // Existing data with a sequence number < sequence is deleted. The caller is | ||
| // expected to increment the sequence number between calls by the | ||
| // number of items received. In this way the next call implicitly | ||
| // acknowledges receipt of the results from the previous. The | ||
| // acknowledge method is offered for an early ack, so that the | ||
| // producer can continue before the consumer is done processing the | ||
| // received data. | ||
| void getData( | ||
| bool getData( | ||
| const std::string& taskId, | ||
| int destination, | ||
| uint64_t maxBytes, | ||
|
|
@@ -264,8 +266,14 @@ class PartitionedOutputBufferManager { | |
|
|
||
| private: | ||
| // Retrieves the set of buffers for a query. | ||
| // Throws an exception if buffer doesn't exist. | ||
| std::shared_ptr<PartitionedOutputBuffer> getBuffer(const std::string& taskId); | ||
|
|
||
| // Retrieves the set of buffers for a query if exists. | ||
| // Returns NULL if task not found. | ||
| std::shared_ptr<PartitionedOutputBuffer> getBufferIfExists( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a nice method. Please, add empty line before this method and document it. Let's use this method in PartitionedOutputBufferManager::deleteResults as well. |
||
| const std::string& taskId); | ||
|
|
||
| folly::Synchronized< | ||
| std::unordered_map<std::string, std::shared_ptr<PartitionedOutputBuffer>>, | ||
| std::mutex> | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -108,7 +108,7 @@ class PartitionedOutputBufferManagerTest : public testing::Test { | |
| uint64_t maxBytes = 1024, | ||
| int expectedGroups = 1) { | ||
| bool receivedData = false; | ||
| bufferManager_->getData( | ||
| ASSERT_TRUE(bufferManager_->getData( | ||
| taskId, | ||
| destination, | ||
| maxBytes, | ||
|
|
@@ -124,7 +124,7 @@ class PartitionedOutputBufferManagerTest : public testing::Test { | |
| } | ||
| EXPECT_EQ(inSequence, sequence) << "for destination " << destination; | ||
| receivedData = true; | ||
| }); | ||
| })); | ||
| EXPECT_TRUE(receivedData) << "for destination " << destination; | ||
| } | ||
|
|
||
|
|
@@ -163,12 +163,12 @@ class PartitionedOutputBufferManagerTest : public testing::Test { | |
| void | ||
| fetchEndMarker(const std::string& taskId, int destination, int64_t sequence) { | ||
| bool receivedData = false; | ||
| bufferManager_->getData( | ||
| ASSERT_TRUE(bufferManager_->getData( | ||
| taskId, | ||
| destination, | ||
| std::numeric_limits<uint64_t>::max(), | ||
| sequence, | ||
| receiveEndMarker(destination, sequence, receivedData)); | ||
| receiveEndMarker(destination, sequence, receivedData))); | ||
| EXPECT_TRUE(receivedData) << "for destination " << destination; | ||
| bufferManager_->deleteResults(taskId, destination); | ||
| } | ||
|
|
@@ -183,12 +183,12 @@ class PartitionedOutputBufferManagerTest : public testing::Test { | |
| int64_t sequence, | ||
| bool& receivedEndMarker) { | ||
| receivedEndMarker = false; | ||
| bufferManager_->getData( | ||
| ASSERT_TRUE(bufferManager_->getData( | ||
| taskId, | ||
| destination, | ||
| std::numeric_limits<uint64_t>::max(), | ||
| sequence, | ||
| receiveEndMarker(destination, 1, receivedEndMarker)); | ||
| receiveEndMarker(destination, 1, receivedEndMarker))); | ||
| EXPECT_FALSE(receivedEndMarker) << "for destination " << destination; | ||
| } | ||
|
|
||
|
|
@@ -219,12 +219,12 @@ class PartitionedOutputBufferManagerTest : public testing::Test { | |
| int expectedGroups, | ||
| bool& receivedData) { | ||
| receivedData = false; | ||
| bufferManager_->getData( | ||
| ASSERT_TRUE(bufferManager_->getData( | ||
| taskId, | ||
| destination, | ||
| 1024, | ||
| sequence, | ||
| receiveData(destination, sequence, expectedGroups, receivedData)); | ||
| receiveData(destination, sequence, expectedGroups, receivedData))); | ||
| EXPECT_FALSE(receivedData) << "for destination " << destination; | ||
| } | ||
|
|
||
|
|
@@ -420,3 +420,17 @@ TEST_F(PartitionedOutputBufferManagerTest, serializedPage) { | |
| EXPECT_EQ(mappedMemory->allocateBytesStats().totalSmall, 0); | ||
| } | ||
| } | ||
|
|
||
| TEST_F(PartitionedOutputBufferManagerTest, getDataOnFailedTask) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's update existing calls to getData to assert that they return true. |
||
| // Fetching data on a task which was either never initialized in the buffer | ||
| // manager or was removed by a parallel thread must return false. The `notify` | ||
| // callback must not be registered. | ||
| ASSERT_FALSE(bufferManager_->getData( | ||
| "test.0.1", | ||
| 1, | ||
| 10, | ||
| 1, | ||
| [](std::vector<std::unique_ptr<folly::IOBuf>> pages, int64_t sequence) { | ||
| VELOX_UNREACHABLE(); | ||
| })); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's clarify that this method throws is buffer for the specified task does not exist.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I still think we should clarify that this method throws when task is not found.