Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions source/common/common/backoff_strategy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,25 @@ namespace Envoy {

JitteredBackOffStrategy::JitteredBackOffStrategy(uint64_t base_interval, uint64_t max_interval,
Runtime::RandomGenerator& random)
: base_interval_(base_interval), max_interval_(max_interval), random_(random) {
: base_interval_(base_interval), max_interval_(max_interval), next_interval_(base_interval),
random_(random) {
ASSERT(base_interval_ > 0);
ASSERT(base_interval_ <= max_interval_);
}

uint64_t JitteredBackOffStrategy::nextBackOffMs() {
const uint64_t multiplier = (1 << current_retry_) - 1;
const uint64_t base_backoff = multiplier * base_interval_;
if (base_backoff <= max_interval_) {
current_retry_++;
const uint64_t backoff = next_interval_;
ASSERT(backoff > 0);
// Set next_interval_ to max_interval_ if doubling the interval would exceed the max or overflow.
if (next_interval_ < max_interval_ / 2) {
next_interval_ *= 2;
} else {
next_interval_ = max_interval_;
}
ASSERT(base_backoff > 0);
return std::min(random_.random() % base_backoff, max_interval_);
return std::min(random_.random() % backoff, max_interval_);
}

void JitteredBackOffStrategy::reset() { current_retry_ = 1; }
void JitteredBackOffStrategy::reset() { next_interval_ = base_interval_; }

FixedBackOffStrategy::FixedBackOffStrategy(uint64_t interval_ms) : interval_ms_(interval_ms) {
ASSERT(interval_ms_ > 0);
Expand Down
2 changes: 1 addition & 1 deletion source/common/common/backoff_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class JitteredBackOffStrategy : public BackOffStrategy {
private:
const uint64_t base_interval_;
const uint64_t max_interval_{};
uint64_t current_retry_{1};
uint64_t next_interval_;
Runtime::RandomGenerator& random_;
};

Expand Down
39 changes: 30 additions & 9 deletions test/common/common/backoff_strategy_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,31 +30,52 @@ TEST(BackOffStrategyTest, JitteredBackOffBasicReset) {
EXPECT_EQ(2, jittered_back_off.nextBackOffMs()); // Should start from start
}

TEST(BackOffStrategyTest, JitteredBackOffDoesntOverflow) {
NiceMock<Runtime::MockRandomGenerator> random;
ON_CALL(random, random()).WillByDefault(Return(std::numeric_limits<uint64_t>::max() - 1));

JitteredBackOffStrategy jittered_back_off(1, std::numeric_limits<uint64_t>::max(), random);
for (int iter = 0; iter < 100; ++iter) {
EXPECT_GE(std::numeric_limits<uint64_t>::max(), jittered_back_off.nextBackOffMs());
}
EXPECT_EQ(std::numeric_limits<uint64_t>::max() - 1, jittered_back_off.nextBackOffMs());
}

TEST(BackOffStrategyTest, JitteredBackOffWithMaxInterval) {
NiceMock<Runtime::MockRandomGenerator> random;
ON_CALL(random, random()).WillByDefault(Return(1024));
ON_CALL(random, random()).WillByDefault(Return(9999));

JitteredBackOffStrategy jittered_back_off(5, 100, random);
EXPECT_EQ(4, jittered_back_off.nextBackOffMs());
EXPECT_EQ(4, jittered_back_off.nextBackOffMs());
EXPECT_EQ(9, jittered_back_off.nextBackOffMs());
EXPECT_EQ(49, jittered_back_off.nextBackOffMs());
EXPECT_EQ(94, jittered_back_off.nextBackOffMs());
EXPECT_EQ(94, jittered_back_off.nextBackOffMs()); // Should return Max here
EXPECT_EQ(19, jittered_back_off.nextBackOffMs());
EXPECT_EQ(39, jittered_back_off.nextBackOffMs());
EXPECT_EQ(79, jittered_back_off.nextBackOffMs());
EXPECT_EQ(99, jittered_back_off.nextBackOffMs()); // Should return Max here
EXPECT_EQ(99, jittered_back_off.nextBackOffMs());
}

TEST(BackOffStrategyTest, JitteredBackOffWithMaxIntervalReset) {
NiceMock<Runtime::MockRandomGenerator> random;
ON_CALL(random, random()).WillByDefault(Return(1024));
ON_CALL(random, random()).WillByDefault(Return(9999));

JitteredBackOffStrategy jittered_back_off(5, 100, random);
EXPECT_EQ(4, jittered_back_off.nextBackOffMs());
EXPECT_EQ(4, jittered_back_off.nextBackOffMs());
EXPECT_EQ(9, jittered_back_off.nextBackOffMs());
EXPECT_EQ(49, jittered_back_off.nextBackOffMs());
EXPECT_EQ(19, jittered_back_off.nextBackOffMs());
EXPECT_EQ(39, jittered_back_off.nextBackOffMs());
EXPECT_EQ(79, jittered_back_off.nextBackOffMs());
EXPECT_EQ(99, jittered_back_off.nextBackOffMs()); // Should return Max here
EXPECT_EQ(99, jittered_back_off.nextBackOffMs());

jittered_back_off.reset();
EXPECT_EQ(4, jittered_back_off.nextBackOffMs()); // Should start from start
EXPECT_EQ(4, jittered_back_off.nextBackOffMs());
EXPECT_EQ(9, jittered_back_off.nextBackOffMs());
EXPECT_EQ(19, jittered_back_off.nextBackOffMs());
EXPECT_EQ(39, jittered_back_off.nextBackOffMs());
EXPECT_EQ(79, jittered_back_off.nextBackOffMs());
EXPECT_EQ(99, jittered_back_off.nextBackOffMs()); // Should return Max here
EXPECT_EQ(99, jittered_back_off.nextBackOffMs());
}

TEST(BackOffStrategyTest, FixedBackOffBasicReset) {
Expand Down
40 changes: 26 additions & 14 deletions test/common/router/retry_state_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -834,21 +834,21 @@ TEST_F(RouterRetryStateImplTest, Backoff) {
setup(request_headers);
EXPECT_TRUE(state_->enabled());

EXPECT_CALL(random_, random()).WillOnce(Return(49));
EXPECT_CALL(random_, random()).WillOnce(Return(190));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

qq: why does this test need to change as well as the HDS test? It seems like this fix shouldn't have any behavior change in the normal case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The issue is related to this part of the original implementation that traces back to the first revision of Envoy in github:
uint32_t multiplier = (1 << current_retry_) - 1;

This is not increasing backoff by a factor of 2 each time. I think that what was really intended was:
uint32_t multiplier = 1 << (current_retry_ - 1);

Current backoff sequence: 1, 3, 7, 15...
New backoff sequence: 1, 2, 4, 8, 16...
The biggest difference is the jump from increase from 1 to 3x instead of 2x at the first step. Replicating the current behavior is possible, but the code would be slightly more complicated.

Some discussion about it here:
#3791 (comment)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah OK got it, makes sense, thanks.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course, thanks for digging deeper on unexplained code changes.

retry_timer_ = new Event::MockTimer(&dispatcher_);
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(24), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(15), _));
EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryReset(connect_failure_, callback_));
EXPECT_CALL(callback_ready_, ready());
retry_timer_->invokeCallback();

EXPECT_CALL(random_, random()).WillOnce(Return(149));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(74), _));
EXPECT_CALL(random_, random()).WillOnce(Return(190));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(40), _));
EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryReset(connect_failure_, callback_));
EXPECT_CALL(callback_ready_, ready());
retry_timer_->invokeCallback();

EXPECT_CALL(random_, random()).WillOnce(Return(349));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(174), _));
EXPECT_CALL(random_, random()).WillOnce(Return(190));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(90), _));
EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryReset(connect_failure_, callback_));
EXPECT_CALL(callback_ready_, ready());
retry_timer_->invokeCallback();
Expand Down Expand Up @@ -879,19 +879,25 @@ TEST_F(RouterRetryStateImplTest, CustomBackOffInterval) {
retry_timer_->invokeCallback();

EXPECT_CALL(random_, random()).WillOnce(Return(350));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(50), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(150), _));
EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryReset(connect_failure_, callback_));
EXPECT_CALL(callback_ready_, ready());
retry_timer_->invokeCallback();

EXPECT_CALL(random_, random()).WillOnce(Return(751));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(51), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(351), _));
EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryReset(connect_failure_, callback_));
EXPECT_CALL(callback_ready_, ready());
retry_timer_->invokeCallback();

EXPECT_CALL(random_, random()).WillOnce(Return(2399));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(799), _));
EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryReset(connect_failure_, callback_));
EXPECT_CALL(callback_ready_, ready());
retry_timer_->invokeCallback();

EXPECT_CALL(random_, random()).WillOnce(Return(1499));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(1200), _));
EXPECT_CALL(random_, random()).WillOnce(Return(2399));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(1199), _));
EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryReset(connect_failure_, callback_));
EXPECT_CALL(callback_ready_, ready());
retry_timer_->invokeCallback();
Expand All @@ -914,19 +920,25 @@ TEST_F(RouterRetryStateImplTest, CustomBackOffIntervalDefaultMax) {
retry_timer_->invokeCallback();

EXPECT_CALL(random_, random()).WillOnce(Return(350));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(50), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(150), _));
EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryReset(connect_failure_, callback_));
EXPECT_CALL(callback_ready_, ready());
retry_timer_->invokeCallback();

EXPECT_CALL(random_, random()).WillOnce(Return(751));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(51), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(351), _));
EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryReset(connect_failure_, callback_));
EXPECT_CALL(callback_ready_, ready());
retry_timer_->invokeCallback();

EXPECT_CALL(random_, random()).WillOnce(Return(2999));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(599), _));
EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryReset(connect_failure_, callback_));
EXPECT_CALL(callback_ready_, ready());
retry_timer_->invokeCallback();

EXPECT_CALL(random_, random()).WillOnce(Return(1499));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(1000), _));
EXPECT_CALL(random_, random()).WillOnce(Return(2999));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(999), _));
EXPECT_EQ(RetryStatus::Yes, state_->shouldRetryReset(connect_failure_, callback_));
EXPECT_CALL(callback_ready_, ready());
retry_timer_->invokeCallback();
Expand Down
10 changes: 5 additions & 5 deletions test/common/upstream/hds_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -285,12 +285,12 @@ TEST_F(HdsTest, TestStreamConnectionFailure) {
.WillOnce(Return(nullptr))
.WillOnce(Return(&async_stream_));

EXPECT_CALL(random_, random()).WillOnce(Return(1000005)).WillRepeatedly(Return(1234567));
EXPECT_CALL(random_, random()).WillOnce(Return(1000005)).WillRepeatedly(Return(654321));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(5), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(1567), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(2567), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(4567), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(25567), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(321), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(2321), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(6321), _));
EXPECT_CALL(*retry_timer_, enableTimer(std::chrono::milliseconds(14321), _));
EXPECT_CALL(async_stream_, sendMessageRaw_(_, _));

// Test connection failure and retry
Expand Down