-
Notifications
You must be signed in to change notification settings - Fork 5.5k
dns_cache: tracking ttl but not yet using it #17951
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
fdf2fc5
e8e3020
e2b3815
b150b55
3ded0b7
70553bc
02e9e53
054f2ef
24c13f3
b8cd3b1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -193,14 +193,20 @@ void DnsCacheImpl::startCacheLoad(const std::string& host, uint16_t default_port | |
| return; | ||
| } | ||
|
|
||
| const auto host_attributes = Http::Utility::parseAuthority(host); | ||
|
|
||
| primary_host = createHost(host, default_port); | ||
| startResolve(host, *primary_host); | ||
| } | ||
|
|
||
|
|
||
| DnsCacheImpl::PrimaryHostInfo* DnsCacheImpl::createHost(const std::string& host, uint16_t default_port) { | ||
| const auto host_attributes = Http::Utility::parseAuthority(host); | ||
| // TODO(mattklein123): Right now, the same host with different ports will become two | ||
| // independent primary hosts with independent DNS resolutions. I'm not sure how much this will | ||
| // matter, but we could consider collapsing these down and sharing the underlying DNS resolution. | ||
| { | ||
| absl::WriterMutexLock writer_lock{&primary_hosts_lock_}; | ||
| primary_host = primary_hosts_ | ||
| return primary_hosts_ | ||
| // try_emplace() is used here for direct argument forwarding. | ||
| .try_emplace(host, std::make_unique<PrimaryHostInfo>( | ||
| *this, std::string(host_attributes.host_), | ||
|
|
@@ -210,8 +216,6 @@ void DnsCacheImpl::startCacheLoad(const std::string& host, uint16_t default_port | |
| [this, host]() { onResolveTimeout(host); })) | ||
| .first->second.get(); | ||
| } | ||
|
|
||
| startResolve(host, *primary_host); | ||
| } | ||
|
|
||
| DnsCacheImpl::PrimaryHostInfo& DnsCacheImpl::getPrimaryHost(const std::string& host) { | ||
|
|
@@ -300,9 +304,19 @@ void DnsCacheImpl::finishResolve(const std::string& host, | |
| return primary_host_it->second.get(); | ||
| }(); | ||
|
|
||
| const bool first_resolve = !primary_host_info->host_info_->firstResolveComplete(); | ||
| primary_host_info->timeout_timer_->disableTimer(); | ||
| primary_host_info->active_query_ = nullptr; | ||
| bool first_resolve = false; | ||
|
|
||
| if (!from_cache) { | ||
| first_resolve = !primary_host_info->host_info_->firstResolveComplete(); | ||
| primary_host_info->timeout_timer_->disableTimer(); | ||
| primary_host_info->active_query_ = nullptr; | ||
|
|
||
| if (status == Network::DnsResolver::ResolutionStatus::Failure) { | ||
| stats_.dns_query_failure_.inc(); | ||
| } else { | ||
| stats_.dns_query_success_.inc(); | ||
| } | ||
| } | ||
|
|
||
| // If the DNS resolver successfully resolved with an empty response list, the dns cache does not | ||
| // update. This ensures that a potentially previously resolved address does not stabilize back to | ||
|
|
@@ -312,12 +326,6 @@ void DnsCacheImpl::finishResolve(const std::string& host, | |
| primary_host_info->port_) | ||
| : nullptr; | ||
|
|
||
| if (status == Network::DnsResolver::ResolutionStatus::Failure) { | ||
| stats_.dns_query_failure_.inc(); | ||
| } else { | ||
| stats_.dns_query_success_.inc(); | ||
| } | ||
|
|
||
| // Only the change the address if: | ||
| // 1) The new address is valid && | ||
| // 2a) The host doesn't yet have an address || | ||
|
|
@@ -329,25 +337,34 @@ void DnsCacheImpl::finishResolve(const std::string& host, | |
| auto current_address = primary_host_info->host_info_->address(); | ||
| if (new_address != nullptr && (current_address == nullptr || *current_address != *new_address)) { | ||
| if (!from_cache) { | ||
| addCacheEntry(host, new_address); | ||
| addCacheEntry(host, new_address, response.front().ttl_); | ||
| } | ||
| // TODO(alyssawilk) don't immediately push cached entries to threads. | ||
| // Only serve stale entries if a configured resolve timeout has fired. | ||
| ENVOY_LOG(debug, "host '{}' address has changed", host); | ||
| primary_host_info->host_info_->setAddress(new_address); | ||
| runAddUpdateCallbacks(host, primary_host_info->host_info_); | ||
| address_changed = true; | ||
| stats_.host_address_changed_.inc(); | ||
| } | ||
|
|
||
| if (first_resolve || address_changed) { | ||
| SystemTime now = main_thread_dispatcher_.timeSource().systemTime(); | ||
| uint64_t ms_since_epoch = | ||
| std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count(); | ||
| uint64_t stale_at = ms_since_epoch + std::chrono::duration_cast<std::chrono::seconds>(response.front().ttl_).count(); | ||
| primary_host_info->host_info_->updateStale(stale_at); | ||
|
|
||
| if (first_resolve) { | ||
| primary_host_info->host_info_->setFirstResolveComplete(); | ||
| } | ||
| if (first_resolve || address_changed) { | ||
| // TODO(alyssawilk) only notify threads of stale results after a resolution | ||
| // timeout. | ||
| notifyThreads(host, primary_host_info->host_info_); | ||
| } | ||
|
|
||
| // Kick off the refresh timer. | ||
| // TODO(mattklein123): Consider jitter here. It may not be necessary since the initial host | ||
| // is populated dynamically. | ||
| // TODO(alyssawilk) also consider TTL here. | ||
| if (status == Network::DnsResolver::ResolutionStatus::Success) { | ||
| failure_backoff_strategy_->reset(); | ||
| primary_host_info->refresh_timer_->enableTimer(refresh_interval_); | ||
|
|
@@ -424,12 +441,16 @@ DnsCacheImpl::PrimaryHostInfo::~PrimaryHostInfo() { | |
| } | ||
|
|
||
| void DnsCacheImpl::addCacheEntry(const std::string& host, | ||
| const Network::Address::InstanceConstSharedPtr& address) { | ||
| const Network::Address::InstanceConstSharedPtr& address, | ||
| const std::chrono::seconds& ttl) { | ||
| if (!key_value_store_) { | ||
| return; | ||
| } | ||
| // TODO(alyssawilk) cache data should include TTL, or some other indicator. | ||
| const std::string value = absl::StrCat(address->asString()); | ||
| SystemTime now = main_thread_dispatcher_.timeSource().systemTime(); | ||
| uint64_t ms_since_epoch = | ||
| std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count(); | ||
| const std::string value = absl::StrCat(address->asString(), "|", ttl.count(), "|", | ||
| ms_since_epoch); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not for this PR, but it seems like we could add a wrapper around KeyValueStore in which the Value is allowed to be a type which implements an interfaces that has methods like /shrug. Maybe something to think about down the road... |
||
| key_value_store_->addOrUpdate(host, value); | ||
| } | ||
|
|
||
|
|
@@ -450,17 +471,31 @@ void DnsCacheImpl::loadCacheEntries( | |
| key_value_store_ = factory.createStore(config.key_value_config(), validation_visitor_, | ||
| main_thread_dispatcher_, file_system_); | ||
| KeyValueStore::ConstIterateCb load = [this](const std::string& key, const std::string& value) { | ||
| auto address = Network::Utility::parseInternetAddressAndPortNoThrow(value); | ||
| if (address == nullptr) { | ||
| Network::Address::InstanceConstSharedPtr address; | ||
| const auto parts = StringUtil::splitToken(value, "|"); | ||
| std::chrono::seconds ttl(0); | ||
| if (parts.size() == 3) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I might be misreading this code but it seems like up on line 452, we write 3 things into the cache value. Down here, we spilt on | and ensure that we have 2 parts. This all makes sense. But I only see code looking at parts[0] (line 478) and parts[1] (line 483) but I don't see a reference to parts[2]. Is this expected?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah I wasn't using part two yet. I was going to do in the PR where we start using stale, but I've worked it into this one with the time updates. |
||
| address = Network::Utility::parseInternetAddressAndPortNoThrow(std::string(parts[0])); | ||
| if (address == nullptr) { | ||
| ENVOY_LOG(warn, "{} is not a valid address", parts[0]); | ||
| } | ||
| uint64_t ttl_int; | ||
| if (absl::SimpleAtoi(parts[1], &ttl_int) && ttl_int != 0) { | ||
| ttl = std::chrono::seconds(ttl_int); | ||
| } else { | ||
| ENVOY_LOG(warn, "{} is not a valid ttl", parts[1]); | ||
| } | ||
| } else { | ||
| ENVOY_LOG(warn, "Incorrect number of tokens in the cache line"); | ||
| } | ||
| if (address == nullptr || ttl == std::chrono::seconds(0)) { | ||
| ENVOY_LOG(warn, "Unable to parse cache line '{}'", value); | ||
| return KeyValueStore::Iterate::Break; | ||
| } | ||
| stats_.cache_load_.inc(); | ||
| std::list<Network::DnsResponse> response; | ||
| // TODO(alyssawilk) change finishResolve to actually use the TTL rather than | ||
| // putting 0 here, return the remaining TTL or indicate the result is stale. | ||
| response.emplace_back(Network::DnsResponse(address, std::chrono::seconds(0) /* ttl */)); | ||
| startCacheLoad(key, address->ip()->port()); | ||
| createHost(key, address->ip()->port()); | ||
| response.emplace_back(Network::DnsResponse(address, ttl)); | ||
| finishResolve(key, Network::DnsResolver::ResolutionStatus::Success, std::move(response), true); | ||
| return KeyValueStore::Iterate::Continue; | ||
| }; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -112,6 +112,7 @@ class DnsCacheImpl : public DnsCache, Logger::Loggable<Logger::Id::forward_proxy | |
| const std::string& resolvedHost() const override { return resolved_host_; } | ||
| bool isIpAddress() const override { return is_ip_address_; } | ||
| void touch() final { last_used_time_ = time_source_.monotonicTime().time_since_epoch(); } | ||
| void updateStale(uint64_t time) { stale_at_time_ = time; } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there by any chance a data type we can use here which expresses the semantics here? For example is this a time_t (seconds since the UNIX epoch) or is it more of a TTL (a time delta)? Perhaps one of the std::chrono types? Oh, or maybe just SystemTime directly?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
|
||
| void setAddress(Network::Address::InstanceConstSharedPtr address) { | ||
| absl::WriterMutexLock lock{&resolve_lock_}; | ||
|
|
@@ -141,6 +142,7 @@ class DnsCacheImpl : public DnsCache, Logger::Loggable<Logger::Id::forward_proxy | |
| // Using std::chrono::steady_clock::duration is required for compilation within an atomic vs. | ||
| // using MonotonicTime. | ||
| std::atomic<std::chrono::steady_clock::duration> last_used_time_; | ||
| std::atomic<uint64_t> stale_at_time_; | ||
| bool first_resolve_complete_ ABSL_GUARDED_BY(resolve_lock_){false}; | ||
| }; | ||
|
|
||
|
|
@@ -186,10 +188,12 @@ class DnsCacheImpl : public DnsCache, Logger::Loggable<Logger::Id::forward_proxy | |
| PrimaryHostInfo& getPrimaryHost(const std::string& host); | ||
|
|
||
| void addCacheEntry(const std::string& host, | ||
| const Network::Address::InstanceConstSharedPtr& address); | ||
| const Network::Address::InstanceConstSharedPtr& address, | ||
| const std::chrono::seconds& ttl); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think normally we'd pass this by value since it's just a wrapper around an integer |
||
| void removeCacheEntry(const std::string& host); | ||
| void loadCacheEntries( | ||
| const envoy::extensions::common::dynamic_forward_proxy::v3::DnsCacheConfig& config); | ||
| PrimaryHostInfo* createHost(const std::string& host, uint16_t default_port); | ||
|
|
||
| Event::Dispatcher& main_thread_dispatcher_; | ||
| const Network::DnsLookupFamily dns_lookup_family_; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ | |
| #include "test/test_common/test_runtime.h" | ||
| #include "test/test_common/utility.h" | ||
|
|
||
| using testing::ContainsRegex; | ||
| using testing::DoAll; | ||
| using testing::InSequence; | ||
| using testing::Return; | ||
|
|
@@ -1064,14 +1065,14 @@ TEST_F(DnsCacheImplTest, ResolveSuccessWithCaching) { | |
|
|
||
| EXPECT_CALL(*timeout_timer, disableTimer()); | ||
| // Make sure the store gets the first insert. | ||
| EXPECT_CALL(*store, addOrUpdate("foo.com", "10.0.0.1:80")); | ||
| EXPECT_CALL(*store, addOrUpdate("foo.com", ContainsRegex("10.0.0.1:80|30|\\d*)"))); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of using a Regex, could we use the actual value? Maybe that's hard cause we're using the "real" clock and not a mock clock? (If so, would it be possible to use a mock clock? Probably!)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can now! |
||
| EXPECT_CALL(update_callbacks_, | ||
| onDnsHostAddOrUpdate("foo.com", DnsHostInfoEquals("10.0.0.1:80", "foo.com", false))); | ||
| EXPECT_CALL(callbacks, | ||
| onLoadDnsCacheComplete(DnsHostInfoEquals("10.0.0.1:80", "foo.com", false))); | ||
| EXPECT_CALL(*resolve_timer, enableTimer(std::chrono::milliseconds(60000), _)); | ||
| resolve_cb(Network::DnsResolver::ResolutionStatus::Success, | ||
| TestUtility::makeDnsResponse({"10.0.0.1"})); | ||
| TestUtility::makeDnsResponse({"10.0.0.1"}, std::chrono::seconds(30))); | ||
|
|
||
| checkStats(1 /* attempt */, 1 /* success */, 0 /* failure */, 1 /* address changed */, | ||
| 1 /* added */, 0 /* removed */, 1 /* num hosts */); | ||
|
|
@@ -1089,7 +1090,7 @@ TEST_F(DnsCacheImplTest, ResolveSuccessWithCaching) { | |
| EXPECT_CALL(*timeout_timer, disableTimer()); | ||
| EXPECT_CALL(*resolve_timer, enableTimer(std::chrono::milliseconds(60000), _)); | ||
| resolve_cb(Network::DnsResolver::ResolutionStatus::Success, | ||
| TestUtility::makeDnsResponse({"10.0.0.1"})); | ||
| TestUtility::makeDnsResponse({"10.0.0.1"}, std::chrono::seconds(30))); | ||
|
|
||
| checkStats(2 /* attempt */, 2 /* success */, 0 /* failure */, 1 /* address changed */, | ||
| 1 /* added */, 0 /* removed */, 1 /* num hosts */); | ||
|
|
@@ -1105,12 +1106,12 @@ TEST_F(DnsCacheImplTest, ResolveSuccessWithCaching) { | |
|
|
||
| EXPECT_CALL(*timeout_timer, disableTimer()); | ||
| // Make sure the store gets the updated address. | ||
| EXPECT_CALL(*store, addOrUpdate("foo.com", "10.0.0.2:80")); | ||
| EXPECT_CALL(*store, addOrUpdate("foo.com", ContainsRegex("10.0.0.2:80|30|\\d*"))); | ||
| EXPECT_CALL(update_callbacks_, | ||
| onDnsHostAddOrUpdate("foo.com", DnsHostInfoEquals("10.0.0.2:80", "foo.com", false))); | ||
| EXPECT_CALL(*resolve_timer, enableTimer(std::chrono::milliseconds(60000), _)); | ||
| resolve_cb(Network::DnsResolver::ResolutionStatus::Success, | ||
| TestUtility::makeDnsResponse({"10.0.0.2"})); | ||
| TestUtility::makeDnsResponse({"10.0.0.2"}, std::chrono::seconds(30))); | ||
|
|
||
| checkStats(3 /* attempt */, 3 /* success */, 0 /* failure */, 2 /* address changed */, | ||
| 1 /* added */, 0 /* removed */, 1 /* num hosts */); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -122,7 +122,8 @@ name: envoy.clusters.dynamic_forward_proxy | |
| if (write_cache_file_) { | ||
| std::string host = | ||
| fmt::format("localhost:{}", fake_upstreams_[0]->localAddress()->ip()->port()); | ||
| std::string value = fake_upstreams_[0]->localAddress()->asString(); | ||
| std::string value = absl::StrCat( | ||
| fake_upstreams_[0]->localAddress()->asString(), "|1000000|0"); | ||
| TestEnvironment::writeStringToFileForTest( | ||
| "dns_cache.txt", absl::StrCat(host.length(), "\n", host, value.length(), "\n", value)); | ||
| } | ||
|
|
@@ -359,7 +360,6 @@ TEST_P(ProxyFilterIntegrationTest, UseCacheFile) { | |
| sendRequestAndWaitForResponse(request_headers, 1024, default_response_headers_, 1024); | ||
| checkSimpleRequestSuccess(1024, 1024, response.get()); | ||
| EXPECT_EQ(1, test_server_->counter("dns_cache.foo.cache_load")->value()); | ||
| EXPECT_EQ(1, test_server_->counter("dns_cache.foo.dns_query_attempt")->value()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be valid to check that this is 0 since the cached entry won't be re-queried?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it might race becasue it reregisters for refresh and I'm not sure how long it'll take |
||
| EXPECT_EQ(1, test_server_->counter("dns_cache.foo.host_added")->value()); | ||
| } | ||
| #endif | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: It might make sense for this method to take the ttl directly and do the work in the method of coverting the ttl to whatever the internal representation is of "now + ttl"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.