Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions envoy/http/alternate_protocols_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,14 @@ class AlternateProtocolsCache {
virtual bool isHttp3Broken() const PURE;
// Returns true if HTTP/3 is confirmed to be working.
virtual bool isHttp3Confirmed() const PURE;
// Returns true if HTTP/3 has failed recently.
virtual bool hasHttp3FailedRecently() const PURE;
// Marks HTTP/3 broken for a period of time, subject to backoff.
virtual void markHttp3Broken() PURE;
// Marks HTTP/3 as confirmed to be working and resets the backoff timeout.
virtual void markHttp3Confirmed() PURE;
// Marks HTTP/3 as failed recently.
virtual void markHttp3FailedRecently() PURE;
};

virtual ~AlternateProtocolsCache() = default;
Expand Down
30 changes: 23 additions & 7 deletions source/common/http/conn_pool_grid.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ ConnectivityGrid::StreamCreationResult ConnectivityGrid::WrapperCallbacks::newSt
describePool(**current_), grid_.origin_.hostname_);
auto attempt = std::make_unique<ConnectionAttemptCallbacks>(*this, current_);
LinkedList::moveIntoList(std::move(attempt), connection_attempts_);
if (!next_attempt_timer_->enabled()) {
if (next_attempt_timer_ != nullptr && !next_attempt_timer_->enabled()) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought that next_attempt_timer_ was initialized in the constructor and could never be null. Is this check needed?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right. It's not needed

next_attempt_timer_->enableTimer(grid_.next_attempt_duration_);
}
// Note that in the case of immediate attempt/failure, newStream will delete this.
Expand Down Expand Up @@ -218,9 +218,12 @@ ConnectivityGrid::ConnectivityGrid(
state_(state), next_attempt_duration_(std::chrono::milliseconds(kDefaultTimeoutMs)),
time_source_(time_source), alternate_protocols_(alternate_protocols),
quic_stat_names_(quic_stat_names), scope_(scope),
// TODO(RyanTheOptimist): Figure out how scheme gets plumbed in here.
origin_("https", getSni(transport_socket_options, host_->transportSocketFactory()),
host_->address()->ip()->port()),
quic_info_(quic_info) {
// ProdClusterManagerFactory::allocateConnPool verifies the protocols are HTTP/1, HTTP/2 and
// HTTP/3.
ASSERT(connectivity_options.protocols_.size() == 3);
ASSERT(alternate_protocols);
std::chrono::milliseconds rtt =
Expand Down Expand Up @@ -299,7 +302,14 @@ ConnectionPool::Cancellable* ConnectivityGrid::newStream(Http::ResponseDecoder&
createNextPool();
}
PoolIterator pool = pools_.begin();
if (!shouldAttemptHttp3() || !options.can_use_http3_) {
Instance::StreamOptions overriding_options(options);
bool delay_tcp_attempt{true};

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we have two initializations, one uses {} and the other (). Could they both use {} (or even = to be more idiomatic)?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

if (shouldAttemptHttp3() && options.can_use_http3_) {
if (getHttp3StatusTracker().hasHttp3FailedRecently()) {
overriding_options.can_send_early_data_ = false;
delay_tcp_attempt = false;
}
} else {
ASSERT(options.can_use_http3_ ||
Runtime::runtimeFeatureEnabled(Runtime::conn_pool_new_stream_with_early_data_and_http3));

Expand All @@ -308,7 +318,7 @@ ConnectionPool::Cancellable* ConnectivityGrid::newStream(Http::ResponseDecoder&
++pool;
}
auto wrapped_callback =
std::make_unique<WrapperCallbacks>(*this, decoder, pool, callbacks, options);
std::make_unique<WrapperCallbacks>(*this, decoder, pool, callbacks, overriding_options);
ConnectionPool::Cancellable* ret = wrapped_callback.get();
LinkedList::moveIntoList(std::move(wrapped_callback), wrapped_callbacks_);
if (wrapped_callbacks_.front()->newStream() == StreamCreationResult::ImmediateResult) {
Expand All @@ -317,6 +327,10 @@ ConnectionPool::Cancellable* ConnectivityGrid::newStream(Http::ResponseDecoder&
// WrappedCallbacks object has also been deleted.
return nullptr;
}
if (!delay_tcp_attempt) {
// Immediately start TCP attempt if HTTP/3 failed recently.
wrapped_callbacks_.front()->tryAnotherConnection();

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we stop the failover timer at this point since there is nothing else to try?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

next_attempt_timer_ is not accessible from the grid, so I left as is. The alarm should trigger another call to tryAnotherConnection() and early returns if there is no next pool. We definitely can cancel it explicitly by adding a getter interface, but I'm wondering if it's worth it.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, fair enough!

}
return ret;
}

Expand Down Expand Up @@ -363,10 +377,7 @@ bool ConnectivityGrid::isPoolHttp3(const ConnectionPool::Instance& pool) {

AlternateProtocolsCache::Http3StatusTracker& ConnectivityGrid::getHttp3StatusTracker() const {
ENVOY_BUG(host_->address()->type() == Network::Address::Type::Ip, "Address is not an IP address");
// TODO(RyanTheOptimist): Figure out how scheme gets plumbed in here.
AlternateProtocolsCache::Origin origin("https", host_->hostname(),
host_->address()->ip()->port());
return alternate_protocols_->getOrCreateHttp3StatusTracker(origin);
return alternate_protocols_->getOrCreateHttp3StatusTracker(origin_);
}

bool ConnectivityGrid::isHttp3Broken() const { return getHttp3StatusTracker().isHttp3Broken(); }
Expand Down Expand Up @@ -449,5 +460,10 @@ void ConnectivityGrid::onHandshakeComplete() {
markHttp3Confirmed();
}

void ConnectivityGrid::onZeroRttHandshakeFailed() {
ENVOY_LOG(trace, "Marking HTTP/3 failed for host '{}'.", host_->hostname());
getHttp3StatusTracker().markHttp3FailedRecently();
}

} // namespace Http
} // namespace Envoy
1 change: 1 addition & 0 deletions source/common/http/conn_pool_grid.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ class ConnectivityGrid : public ConnectionPool::Instance,

// Http3::PoolConnectResultCallback
void onHandshakeComplete() override;
void onZeroRttHandshakeFailed() override;

protected:
// Set the required idle callback on the pool.
Expand Down
4 changes: 4 additions & 0 deletions source/common/http/http3/conn_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ class PoolConnectResultCallback {
// Called when the mandatory handshake is complete. This is when a HTTP/3 connection is regarded
// as connected and is able to send requests.
virtual void onHandshakeComplete() PURE;
// Called upon connection close event from a client who hasn't finish handshake but already sent
// early data.
// TODO(danzh) actually call it from h3 pool.
virtual void onZeroRttHandshakeFailed() PURE;
};

// Http3 subclass of FixedHttpConnPoolImpl which exists to store quic data.
Expand Down
11 changes: 7 additions & 4 deletions source/common/http/http3_status_tracker_impl.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
#include "source/common/http/http3_status_tracker_impl.h"

#include <chrono>
#include <functional>

namespace Envoy {
namespace Http {

Expand All @@ -21,6 +18,10 @@ bool Http3StatusTrackerImpl::isHttp3Broken() const { return state_ == State::Bro

bool Http3StatusTrackerImpl::isHttp3Confirmed() const { return state_ == State::Confirmed; }

bool Http3StatusTrackerImpl::hasHttp3FailedRecently() const {
return state_ == State::FailedRecently;
}

void Http3StatusTrackerImpl::markHttp3Broken() {
state_ = State::Broken;
if (!expiration_timer_->enabled()) {
Expand All @@ -42,11 +43,13 @@ void Http3StatusTrackerImpl::markHttp3Confirmed() {
}
}

void Http3StatusTrackerImpl::markHttp3FailedRecently() { state_ = State::FailedRecently; }

void Http3StatusTrackerImpl::onExpirationTimeout() {
if (state_ != State::Broken) {
return;
}
state_ = State::Pending;
state_ = State::FailedRecently;
}

} // namespace Http
Expand Down
5 changes: 5 additions & 0 deletions source/common/http/http3_status_tracker_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,19 @@ class Http3StatusTrackerImpl : public AlternateProtocolsCache::Http3StatusTracke
bool isHttp3Broken() const override;
// Returns true if HTTP/3 is confirmed to be working.
bool isHttp3Confirmed() const override;
// Returns true if HTTP/3 has failed recently.
bool hasHttp3FailedRecently() const override;
// Marks HTTP/3 broken for a period of time, subject to backoff.
void markHttp3Broken() override;
// Marks HTTP/3 as confirmed to be working and resets the backoff timeout.
void markHttp3Confirmed() override;
// Marks HTTP/3 as failed recently.
void markHttp3FailedRecently() override;

private:
enum class State {
Pending,
FailedRecently,
Broken,
Confirmed,
};
Expand Down
77 changes: 74 additions & 3 deletions test/common/http/conn_pool_grid_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ class ConnectivityGridForTest : public ConnectivityGrid {
public:
using ConnectivityGrid::ConnectivityGrid;

static bool hasHttp3FailedRecently(const ConnectivityGrid& grid) {
return grid.getHttp3StatusTracker().hasHttp3FailedRecently();
}

static absl::optional<PoolIterator> forceCreateNextPool(ConnectivityGrid& grid) {
return grid.createNextPool();
}
Expand All @@ -48,9 +52,13 @@ class ConnectivityGridForTest : public ConnectivityGrid {
setupPool(*instance);
pools_.push_back(ConnectionPool::InstancePtr{instance});
ON_CALL(*instance, newStream(_, _, _))
.WillByDefault(Invoke(
[&](Http::ResponseDecoder&, ConnectionPool::Callbacks& callbacks,
const ConnectionPool::Instance::StreamOptions&) -> ConnectionPool::Cancellable* {
.WillByDefault(
Invoke([&, &grid = *this](Http::ResponseDecoder&, ConnectionPool::Callbacks& callbacks,
const ConnectionPool::Instance::StreamOptions& options)
-> ConnectionPool::Cancellable* {
if (ConnectivityGridForTest::hasHttp3FailedRecently(grid)) {
EXPECT_FALSE(options.can_send_early_data_);
}
if (immediate_success_) {
callbacks.onPoolReady(*encoder_, host(), *info_, absl::nullopt);
return nullptr;
Expand Down Expand Up @@ -837,6 +845,69 @@ TEST_F(ConnectivityGridTest, SuccessWithoutHttp3NoMatchingAlpn) {
grid_->callbacks()->onPoolReady(encoder_, host_, info_, absl::nullopt);
}

// Test the TCP pool will be immediately attempted if HTTP/3 has failed before.
TEST_F(ConnectivityGridTest, Http3FailedRecentlyThenSucceeds) {
initialize();
addHttp3AlternateProtocol();
grid_->onZeroRttHandshakeFailed();
EXPECT_TRUE(ConnectivityGridForTest::hasHttp3FailedRecently(*grid_));
EXPECT_EQ(grid_->first(), nullptr);

// This timer will be returned and armed as the grid creates the wrapper's failover timer.
Event::MockTimer* failover_timer = new StrictMock<MockTimer>(&dispatcher_);
EXPECT_CALL(*failover_timer, enabled()).WillOnce(Return(false)).WillOnce(Return(true));
EXPECT_CALL(*failover_timer, enableTimer(_, _));
EXPECT_NE(grid_->newStream(decoder_, callbacks_,
{/*can_send_early_data_=*/true,
/*can_use_http3_=*/true}),
nullptr);
EXPECT_NE(grid_->first(), nullptr);
// The 2nd pool should be TCP pool and it should have been created together with h3 pool.
EXPECT_NE(grid_->second(), nullptr);
EXPECT_EQ(2u, grid_->callbacks_.size());

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not quite sure I understand how this test that the TCP attempt is not delayed?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By checking EXPECT_NE(grid_->second(), nullptr). The 2nd pool (TCP) is created immediately with the quic pool without waiting for the alarm to fire.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, thanks. I see.


// If failover timer expires, as no more pool to try on.
failover_timer->invokeCallback();
EXPECT_EQ(2u, grid_->callbacks_.size());

// onPoolReady should be passed from the pool back to the original caller.
ASSERT_NE(grid_->callbacks(0), nullptr);
ASSERT_NE(grid_->callbacks(1), nullptr);
EXPECT_CALL(callbacks_.pool_ready_, ready());
EXPECT_CALL(grid_->cancel_, cancel(_));
grid_->callbacks(0)->onPoolReady(encoder_, host_, info_, absl::nullopt);
// Getting onPoolReady() from HTTP/3 pool doesn't change H3 status.
EXPECT_TRUE(ConnectivityGridForTest::hasHttp3FailedRecently(*grid_));
}

// Test the TCP pool will be immediately attempted if HTTP/3 has failed before.
TEST_F(ConnectivityGridTest, Http3FailedRecentlyThenFailsAgain) {
initialize();
addHttp3AlternateProtocol();
grid_->onZeroRttHandshakeFailed();
EXPECT_TRUE(ConnectivityGridForTest::hasHttp3FailedRecently(*grid_));
EXPECT_EQ(grid_->first(), nullptr);

EXPECT_NE(grid_->newStream(decoder_, callbacks_,
{/*can_send_early_data_=*/true,
/*can_use_http3_=*/true}),
nullptr);
EXPECT_NE(grid_->first(), nullptr);
EXPECT_NE(grid_->second(), nullptr);

// onPoolReady should be passed from the pool back to the original caller.
ASSERT_NE(grid_->callbacks(0), nullptr);
ASSERT_NE(grid_->callbacks(1), nullptr);
EXPECT_CALL(callbacks_.pool_ready_, ready());
grid_->callbacks(1)->onPoolReady(encoder_, host_, info_, absl::nullopt);
// Getting onPoolReady() from TCP pool alone doesn't change H3 status.
EXPECT_TRUE(ConnectivityGridForTest::hasHttp3FailedRecently(*grid_));
// Getting onPoolFailure() from Http3 pool later should mark H3 broken.
grid_->callbacks(0)->onPoolFailure(ConnectionPool::PoolFailureReason::LocalConnectionFailure,
"reason", host_);
EXPECT_TRUE(grid_->isHttp3Broken());
}

#ifdef ENVOY_ENABLE_QUIC

} // namespace
Expand Down
1 change: 1 addition & 0 deletions test/common/http/http3/conn_pool_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class Http3ConnPoolImplPeer {
class MockPoolConnectResultCallback : public PoolConnectResultCallback {
public:
MOCK_METHOD(void, onHandshakeComplete, ());
MOCK_METHOD(void, onZeroRttHandshakeFailed, ());
};

class Http3ConnPoolImplTest : public Event::TestUsingSimulatedTime, public testing::Test {
Expand Down
20 changes: 20 additions & 0 deletions test/common/http/http3_status_tracker_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ TEST_F(Http3StatusTrackerImplTest, MarkBroken) {
tracker_.markHttp3Broken();
EXPECT_TRUE(tracker_.isHttp3Broken());
EXPECT_FALSE(tracker_.isHttp3Confirmed());
EXPECT_FALSE(tracker_.hasHttp3FailedRecently());
}

TEST_F(Http3StatusTrackerImplTest, MarkBrokenRepeatedly) {
Expand All @@ -60,6 +61,7 @@ TEST_F(Http3StatusTrackerImplTest, MarkBrokenThenExpires) {
timer_->invokeCallback();
EXPECT_FALSE(tracker_.isHttp3Broken());
EXPECT_FALSE(tracker_.isHttp3Confirmed());
EXPECT_TRUE(tracker_.hasHttp3FailedRecently());
}

TEST_F(Http3StatusTrackerImplTest, MarkBrokenWithBackoff) {
Expand Down Expand Up @@ -150,6 +152,7 @@ TEST_F(Http3StatusTrackerImplTest, MarkBrokenThenExpiresThenConfirmedThenBroken)
EXPECT_CALL(*timer_, enabled()).WillOnce(Return(false));
tracker_.markHttp3Confirmed();
EXPECT_FALSE(tracker_.isHttp3Broken());
EXPECT_FALSE(tracker_.hasHttp3FailedRecently());
EXPECT_TRUE(tracker_.isHttp3Confirmed());

// markConfirmed will have reset the timeout back to the initial value.
Expand All @@ -158,6 +161,7 @@ TEST_F(Http3StatusTrackerImplTest, MarkBrokenThenExpiresThenConfirmedThenBroken)
tracker_.markHttp3Broken();

EXPECT_TRUE(tracker_.isHttp3Broken());
EXPECT_FALSE(tracker_.hasHttp3FailedRecently());
EXPECT_FALSE(tracker_.isHttp3Confirmed());
}

Expand All @@ -170,9 +174,25 @@ TEST_F(Http3StatusTrackerImplTest, MarkBrokenThenConfirmed) {
EXPECT_CALL(*timer_, enabled()).WillOnce(Return(false));
tracker_.markHttp3Confirmed();
EXPECT_FALSE(tracker_.isHttp3Broken());
EXPECT_FALSE(tracker_.hasHttp3FailedRecently());
EXPECT_TRUE(tracker_.isHttp3Confirmed());
}

TEST_F(Http3StatusTrackerImplTest, MarkFailedRecentlyAndThenBroken) {
tracker_.markHttp3FailedRecently();
EXPECT_TRUE(tracker_.hasHttp3FailedRecently());
EXPECT_FALSE(tracker_.isHttp3Broken());
EXPECT_FALSE(tracker_.isHttp3Confirmed());

EXPECT_CALL(*timer_, enabled()).WillOnce(Return(false));
EXPECT_CALL(*timer_, enableTimer(std::chrono::milliseconds(5 * 60 * 1000), nullptr));

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also test what happens when the timer fires? Or, maybe we tested that earlier?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We tested in MarkBrokenThenExpires that when timer fires, isHttp3Broken() returns false and hasHttp3FailedRecently() returns true

tracker_.markHttp3Broken();

EXPECT_TRUE(tracker_.isHttp3Broken());
EXPECT_FALSE(tracker_.isHttp3Confirmed());
EXPECT_FALSE(tracker_.hasHttp3FailedRecently());
}

} // namespace
} // namespace Http
} // namespace Envoy