-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Tcp flow control #1217
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Tcp flow control #1217
Changes from 32 commits
cbe959f
f2ffecb
0a9116d
24b56df
2cb1ebf
f07608b
36ecdc3
ccea6d4
b846116
8fd62ca
1036f4b
ea9aebb
e1a91fa
9bbc0f4
f5ce5f4
73f80d3
62098a4
e15f458
5b514b8
f9603f7
c0bbd2d
8f18115
c604950
31b46da
872e1b3
f373704
71792b3
c6e50dc
c925fee
3bf7617
b8216e9
d55f9eb
b79fc5e
531b61e
69a8e33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| #include "common/buffer/watermark_buffer.h" | ||
|
|
||
| #include "common/common/assert.h" | ||
|
|
||
| namespace Envoy { | ||
| namespace Buffer { | ||
|
|
||
| void WatermarkBuffer::add(const void* data, uint64_t size) { | ||
| wrapped_buffer_->add(data, size); | ||
| checkHighWatermark(); | ||
| } | ||
|
|
||
| void WatermarkBuffer::add(const std::string& data) { | ||
| wrapped_buffer_->add(data); | ||
| checkHighWatermark(); | ||
| } | ||
|
|
||
| void WatermarkBuffer::add(const Instance& data) { | ||
| wrapped_buffer_->add(data); | ||
| checkHighWatermark(); | ||
| } | ||
|
|
||
| void WatermarkBuffer::commit(RawSlice* iovecs, uint64_t num_iovecs) { | ||
| wrapped_buffer_->commit(iovecs, num_iovecs); | ||
| checkHighWatermark(); | ||
| } | ||
|
|
||
| void WatermarkBuffer::drain(uint64_t size) { | ||
| wrapped_buffer_->drain(size); | ||
| checkLowWatermark(); | ||
| } | ||
|
|
||
| void WatermarkBuffer::move(Instance& rhs) { | ||
| wrapped_buffer_->move(rhs); | ||
| checkHighWatermark(); | ||
| } | ||
|
|
||
| void WatermarkBuffer::move(Instance& rhs, uint64_t length) { | ||
| wrapped_buffer_->move(rhs, length); | ||
| checkHighWatermark(); | ||
| } | ||
|
|
||
| int WatermarkBuffer::read(int fd, uint64_t max_length) { | ||
| int bytes_read = wrapped_buffer_->read(fd, max_length); | ||
| checkHighWatermark(); | ||
| return bytes_read; | ||
| } | ||
|
|
||
| uint64_t WatermarkBuffer::reserve(uint64_t length, RawSlice* iovecs, uint64_t num_iovecs) { | ||
| uint64_t bytes_reserved = wrapped_buffer_->reserve(length, iovecs, num_iovecs); | ||
| checkHighWatermark(); | ||
| return bytes_reserved; | ||
| } | ||
|
|
||
| int WatermarkBuffer::write(int fd) { | ||
| int bytes_written = wrapped_buffer_->write(fd); | ||
| checkLowWatermark(); | ||
| return bytes_written; | ||
| } | ||
|
|
||
| void WatermarkBuffer::setWatermarks(uint32_t low_watermark, uint32_t high_watermark) { | ||
| ASSERT(low_watermark < high_watermark); | ||
| low_watermark_ = low_watermark; | ||
| high_watermark_ = high_watermark; | ||
| checkHighWatermark(); | ||
| checkLowWatermark(); | ||
| } | ||
|
|
||
| void WatermarkBuffer::checkLowWatermark() { | ||
| if (!above_high_watermark_called_ || wrapped_buffer_->length() >= low_watermark_) { | ||
| return; | ||
| } | ||
|
|
||
| above_high_watermark_called_ = false; | ||
| below_low_watermark_(); | ||
| } | ||
|
|
||
| void WatermarkBuffer::checkHighWatermark() { | ||
| if (above_high_watermark_called_ || high_watermark_ == 0 || | ||
| wrapped_buffer_->length() <= high_watermark_) { | ||
| return; | ||
| } | ||
|
|
||
| above_high_watermark_called_ = true; | ||
| above_high_watermark_(); | ||
| } | ||
|
|
||
| } // namespace Buffer | ||
| } // namespace Envoy |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,75 @@ | ||
| #pragma once | ||
|
|
||
| #include <string> | ||
|
|
||
| #include "common/buffer/buffer_impl.h" | ||
|
|
||
| namespace Envoy { | ||
| namespace Buffer { | ||
|
|
||
| // A wrapper for an underlying buffer which does watermark validation. | ||
| // The underlying buffer's ownership is transfered to the Watermark buffer. Each time the inner | ||
| // buffer is resized (written to or drained), the watermarks are checked. As the buffer size | ||
| // transitions from under the low watermark to above the high watermark, the above_high_watermark | ||
| // function is called one time. It will not be called again until the buffer is drained below the | ||
| // low watermark, at which point the below_low_watermark function is called. | ||
| // | ||
| // Because the internals of OwnedImpl::move() require accessing the underlying data, OwnedImpl is | ||
| // not compatible with generic Buffer::Impls. To allow compatability between WatermarkBuffer and | ||
| // OwnedImpl::move, WatermarkBuffer must implement LibEventInstance and is also not compatible | ||
| // with generic Buffer::Impls. | ||
| // | ||
| // WatermarkBuffer takes a pointer to a generic InstancePtr in the constructor to allow test mocks | ||
| // which overrides move() in any case. | ||
| class WatermarkBuffer : public LibEventInstance { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is a wrapper around a buffer, does it have to derive from LibEventInstance ? Are the changes to introduce LibEventInstance necessary? (If they are the below static_cast should probably be a dynamic_cast, but it's not clear to me that it's necessary).
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe you just answered my comment with your other comment. If so, maybe add more code comments? |
||
| public: | ||
| WatermarkBuffer(InstancePtr&& buffer, std::function<void()> below_low_watermark, | ||
| std::function<void()> above_high_watermark) | ||
| : wrapped_buffer_(std::move(buffer)), below_low_watermark_(below_low_watermark), | ||
| above_high_watermark_(above_high_watermark) {} | ||
|
|
||
| // Instance | ||
| void add(const void* data, uint64_t size) override; | ||
| void add(const std::string& data) override; | ||
| void add(const Instance& data) override; | ||
| void commit(RawSlice* iovecs, uint64_t num_iovecs) override; | ||
| void drain(uint64_t size) override; | ||
| uint64_t getRawSlices(RawSlice* out, uint64_t out_size) const override { | ||
| return wrapped_buffer_->getRawSlices(out, out_size); | ||
| } | ||
| uint64_t length() const override { return wrapped_buffer_->length(); } | ||
| void* linearize(uint32_t size) override { return wrapped_buffer_->linearize(size); } | ||
| void move(Instance& rhs) override; | ||
| void move(Instance& rhs, uint64_t length) override; | ||
| int read(int fd, uint64_t max_length) override; | ||
| uint64_t reserve(uint64_t length, RawSlice* iovecs, uint64_t num_iovecs) override; | ||
| ssize_t search(const void* data, uint64_t size, size_t start) const override { | ||
| return wrapped_buffer_->search(data, size, start); | ||
| } | ||
| int write(int fd) override; | ||
| Event::Libevent::BufferPtr& buffer() override { | ||
| return static_cast<LibEventInstance&>(*wrapped_buffer_).buffer(); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @htuch as discussed this solves the problem where move didn't work in both directions (now tested) because OwnedImpl assumed it was moving another OwnedImpl. The down-side is that WatermarkBuffer now only can do move()s if it wraps an OwnedImpl. That said the Envoy code base already assumes all buffers are OwnedImpls so I don't think we're losing any flexibility, it's just ugly from a purist perspective. |
||
| } | ||
|
|
||
| void setWatermarks(uint32_t low_watermark, uint32_t high_watermark); | ||
|
|
||
| private: | ||
| void checkHighWatermark(); | ||
| void checkLowWatermark(); | ||
|
|
||
| InstancePtr wrapped_buffer_; | ||
| std::function<void()> below_low_watermark_; | ||
| std::function<void()> above_high_watermark_; | ||
|
|
||
| // Used for enforcing buffer limits (off by default). If these are set to non-zero by a call to | ||
| // setWatermarks() the watermark callbacks will be called as described above. | ||
| uint32_t high_watermark_{0}; | ||
| uint32_t low_watermark_{0}; | ||
| // Tracks the latest state of watermark callbacks. | ||
| // True between the time above_high_watermark_ has been called until above_high_watermark_ has | ||
| // been called. | ||
| bool above_high_watermark_called_{false}; | ||
| }; | ||
|
|
||
| } // namespace Buffer | ||
| } // namespace Envoy | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ | |
| #include "envoy/network/listen_socket.h" | ||
| #include "envoy/network/listener.h" | ||
|
|
||
| #include "common/buffer/buffer_impl.h" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this needed?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For OwnedImplFactory. |
||
| #include "common/event/file_event_impl.h" | ||
| #include "common/event/signal_impl.h" | ||
| #include "common/event/timer_impl.h" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -123,6 +123,58 @@ void TcpProxy::initializeReadFilterCallbacks(Network::ReadFilterCallbacks& callb | |
| config_->stats().downstream_cx_tx_bytes_buffered_}); | ||
| } | ||
|
|
||
| void TcpProxy::readDisableUpstream(bool disable) { | ||
| upstream_connection_->readDisable(disable); | ||
| if (disable) { | ||
| read_callbacks_->upstreamHost() | ||
| ->cluster() | ||
| .stats() | ||
| .upstream_flow_control_paused_reading_total_.inc(); | ||
| } else { | ||
| read_callbacks_->upstreamHost() | ||
| ->cluster() | ||
| .stats() | ||
| .upstream_flow_control_resumed_reading_total_.inc(); | ||
| } | ||
| } | ||
|
|
||
| void TcpProxy::readDisableDownstream(bool disable) { | ||
| read_callbacks_->connection().readDisable(disable); | ||
| if (disable) { | ||
| config_->stats().downstream_flow_control_paused_reading_total_.inc(); | ||
| } else { | ||
| config_->stats().downstream_flow_control_resumed_reading_total_.inc(); | ||
| } | ||
| } | ||
|
|
||
| void TcpProxy::DownstreamCallbacks::onAboveWriteBufferHighWatermark() { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it worth it here to add ASSERTS in all of these functions that basically assert that we aren't getting multiple watermark callbacks? Or can that happen? I'm thinking something along the lines of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. connections can have the read disabled by other means so if we check the actual state of the connection we can only verify that we enable actually disabled sockets (not that we disable enabled sockets) We could track local state by adding 2 booleans to the tcp proxy filter which are only used for debug, which I'd be happy to do - flow control is tricky enough to get right I'm happy to have extra asserts too!
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would be in favor of adding the booleans (at least for now, with a TODO to clean them up once we have production experience) since this code is super tricky to get right and more checking is better IMO. I will leave it up to you to decide either way.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Works for me. Flow control changes are inherently dangerous as you point out. |
||
| ASSERT(!on_high_watermark_called_); | ||
| on_high_watermark_called_ = true; | ||
| // If downstream has too much data buffered, stop reading on the upstream connection. | ||
| parent_.readDisableUpstream(true); | ||
| } | ||
|
|
||
| void TcpProxy::DownstreamCallbacks::onBelowWriteBufferLowWatermark() { | ||
| ASSERT(on_high_watermark_called_); | ||
| on_high_watermark_called_ = false; | ||
| // The downstream buffer has been drained. Resume reading from upstream. | ||
| parent_.readDisableUpstream(false); | ||
| } | ||
|
|
||
| void TcpProxy::UpstreamCallbacks::onAboveWriteBufferHighWatermark() { | ||
| ASSERT(!on_high_watermark_called_); | ||
| on_high_watermark_called_ = true; | ||
| // There's too much data buffered in the upstream write buffer, so stop reading. | ||
| parent_.readDisableDownstream(true); | ||
| } | ||
|
|
||
| void TcpProxy::UpstreamCallbacks::onBelowWriteBufferLowWatermark() { | ||
| ASSERT(on_high_watermark_called_); | ||
| on_high_watermark_called_ = false; | ||
| // The upstream write buffer is drained. Resume reading. | ||
| parent_.readDisableDownstream(false); | ||
| } | ||
|
|
||
| Network::FilterStatus TcpProxy::initializeUpstreamConnection() { | ||
| const std::string& cluster_name = config_->getRouteFromEntries(read_callbacks_->connection()); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Matt's earlier comment re:utilities was spot on - once I started on h2 flow control I found myself copy-pasting watermark checks so pulled this out. I believe it will be reusable for at least some of the codec buffers. Either way it guards us from screwing up and missing buffer size changes changes (I missed one in ssl/connection_impl.cc before my own tests caught me) because all edits are done through the wrapper.
@htuch let me know what you think before I go too crazy with the unit tests :-)