Skip to content

Commit

Permalink
Improve route status reconcile integration tests (#11456)
Browse files Browse the repository at this point in the history
The route status reconcile integration tests are flaky and often spuriously fail in CI.  The assertion which fails is that the `Accepted` status should be true after reconciliation but it is observed to be false.

I have not been able to reproduce this failure locally so it is hard to determine what causes these failures.  However, I noticed that these tests utilize a loop without any backoff and repeatedly request a route from the kubernetes API until the status modified timestamp is updated.  This hot loop results in a very large number of kubernetes API requests very quickly.  It's not clear if these excessive requests could cause the kubernetes API to become overloaded and result in the flaky failures, but it seems like something that's worth fixing regardless.

We replace the hot loop with a usage of `await_condition` so that we take advantage of a watch on the route instead of repeatedly polling it.  This results is far fewer API calls.

We also add some logging of the status each time we retrieve it.  This will be helpful in understanding why these tests fail, if they fail again in the future.

Signed-off-by: Alex Leong <[email protected]>
  • Loading branch information
adleong authored Oct 23, 2023
1 parent 8cd165a commit 225ce93
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 123 deletions.
16 changes: 9 additions & 7 deletions policy-test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,14 +158,16 @@ pub async fn await_route_status(
name: &str,
) -> k8s::policy::httproute::RouteStatus {
use k8s::policy::httproute as api;
await_condition(client, ns, name, |obj: Option<&api::HttpRoute>| -> bool {
let route_status = await_condition(client, ns, name, |obj: Option<&api::HttpRoute>| -> bool {
obj.and_then(|route| route.status.as_ref()).is_some()
})
.await
.expect("must fetch route")
.status
.expect("route must contain a status representation")
.inner
.inner;
tracing::trace!(?route_status, name, ns, "got route status");
route_status
}

// Wait for the endpoints controller to populate the Endpoints resource.
Expand Down Expand Up @@ -286,16 +288,16 @@ pub fn mk_route(
}
}

pub fn find_route_condition(
statuses: impl IntoIterator<Item = k8s_gateway_api::RouteParentStatus>,
parent_name: &str,
) -> Option<k8s::Condition> {
pub fn find_route_condition<'a>(
statuses: impl IntoIterator<Item = &'a k8s_gateway_api::RouteParentStatus>,
parent_name: &'static str,
) -> Option<&'a k8s::Condition> {
statuses
.into_iter()
.find(|route_status| route_status.parent_ref.name == parent_name)
.expect("route must have at least one status set")
.conditions
.into_iter()
.iter()
.find(|cond| cond.type_ == "Accepted")
}

Expand Down
197 changes: 94 additions & 103 deletions policy-test/tests/inbound_http_route_status.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
use kube::ResourceExt;
use linkerd_policy_controller_k8s_api as k8s;
use linkerd_policy_test::{
await_route_status, create, find_route_condition, mk_route, with_temp_ns,
await_condition, await_route_status, create, find_route_condition, mk_route, with_temp_ns,
};

#[tokio::test(flavor = "current_thread")]
async fn inbound_accepted_parent() {
with_temp_ns(|client, ns| async move {
// Create a test 'Server'
let server_name = "test-accepted-server";
let server = k8s::policy::Server {
metadata: k8s::ObjectMeta {
namespace: Some(ns.to_string()),
name: Some("test-accepted-server".to_string()),
name: Some(server_name.to_string()),
..Default::default()
},
spec: k8s::policy::ServerSpec {
pod_selector: k8s::labels::Selector::from_iter(Some((
"app",
"test-accepted-server",
))),
pod_selector: k8s::labels::Selector::from_iter(Some(("app", server_name))),
port: k8s::policy::server::Port::Name("http".to_string()),
proxy_protocol: Some(k8s::policy::server::ProxyProtocol::Http1),
},
Expand All @@ -43,7 +41,7 @@ async fn inbound_accepted_parent() {
let route_status = statuses
.clone()
.into_iter()
.find(|route_status| route_status.parent_ref.name == server.name_unchecked())
.find(|route_status| route_status.parent_ref.name == server_name)
.expect("must have at least one parent status");

// Check status references to parent we have created
Expand All @@ -54,7 +52,7 @@ async fn inbound_accepted_parent() {
assert_eq!(route_status.parent_ref.kind.as_deref(), Some("Server"));

// Check status is accepted with a status of 'True'
let cond = find_route_condition(statuses, &server.name_unchecked())
let cond = find_route_condition(&statuses, server_name)
.expect("must have at least one 'Accepted' condition for accepted server");
assert_eq!(cond.status, "True");
assert_eq!(cond.reason, "Accepted")
Expand Down Expand Up @@ -112,14 +110,14 @@ async fn inbound_multiple_parents() {
.parents;

// Find status for invalid parent and extract the condition
let invalid_cond = find_route_condition(parent_status.clone(), "test-invalid-server")
let invalid_cond = find_route_condition(&parent_status, "test-invalid-server")
.expect("must have at least one 'Accepted' condition set for invalid parent");
// Route shouldn't be accepted
assert_eq!(invalid_cond.status, "False");
assert_eq!(invalid_cond.reason, "NoMatchingParent");

// Find status for valid parent and extract the condition
let valid_cond = find_route_condition(parent_status, "test-valid-server")
let valid_cond = find_route_condition(&parent_status, "test-valid-server")
.expect("must have at least one 'Accepted' condition set for valid parent");
assert_eq!(valid_cond.status, "True");
assert_eq!(valid_cond.reason, "Accepted")
Expand Down Expand Up @@ -154,11 +152,12 @@ async fn inbound_accepted_reconcile_no_parent() {
with_temp_ns(|client, ns| async move {
// Given a route with a nonexistent parentReference, we expect to have an
// 'Accepted' condition with 'False' as a status.
let server_name = "test-reconcile-inbound-server";
let srv_ref = vec![k8s::policy::httproute::ParentReference {
group: Some("policy.linkerd.io".to_string()),
kind: Some("Server".to_string()),
namespace: Some(ns.clone()),
name: "test-reconcile-inbound-server".to_string(),
name: server_name.to_string(),
section_name: None,
port: None,
}];
Expand All @@ -167,13 +166,9 @@ async fn inbound_accepted_reconcile_no_parent() {
mk_route(&ns, "test-reconcile-inbound-route", Some(srv_ref)),
)
.await;
let cond = find_route_condition(
await_route_status(&client, &ns, "test-reconcile-inbound-route")
.await
.parents,
"test-reconcile-inbound-server",
)
.expect("must have at least one 'Accepted' condition set for parent");
let route_status = await_route_status(&client, &ns, "test-reconcile-inbound-route").await;
let cond = find_route_condition(&route_status.parents, server_name)
.expect("must have at least one 'Accepted' condition set for parent");
// Test when parent ref does not exist we get Accepted { False }.
assert_eq!(cond.status, "False");
assert_eq!(cond.reason, "NoMatchingParent");
Expand All @@ -183,48 +178,48 @@ async fn inbound_accepted_reconcile_no_parent() {

// Create the 'Server' that route references and expect it to be picked up
// by the index. Consequently, route will have its status reconciled.
let server = {
let server = k8s::policy::Server {
metadata: k8s::ObjectMeta {
namespace: Some(ns.to_string()),
name: Some("test-reconcile-inbound-server".to_string()),
..Default::default()
},
spec: k8s::policy::ServerSpec {
pod_selector: k8s::labels::Selector::from_iter(Some((
"app",
"test-reconcile-inbound-server",
))),
port: k8s::policy::server::Port::Name("http".to_string()),
proxy_protocol: Some(k8s::policy::server::ProxyProtocol::Http1),
},
};
create(&client, server).await
let server = k8s::policy::Server {
metadata: k8s::ObjectMeta {
namespace: Some(ns.to_string()),
name: Some(server_name.to_string()),
..Default::default()
},
spec: k8s::policy::ServerSpec {
pod_selector: k8s::labels::Selector::from_iter(Some(("app", server_name))),
port: k8s::policy::server::Port::Name("http".to_string()),
proxy_protocol: Some(k8s::policy::server::ProxyProtocol::Http1),
},
};
create(&client, server).await;

// HTTPRoute may not be patched instantly, await the condition where
// create_timestamp and observed_timestamp are different.
let route_status = await_condition(
&client,
&ns,
"test-reconcile-inbound-route",
|obj: Option<&k8s::policy::httproute::HttpRoute>| -> bool {
tracing::trace!(?obj, "got route status");
let status = match obj.and_then(|route| route.status.as_ref()) {
Some(status) => status,
None => return false,
};
let cond = match find_route_condition(&status.inner.parents, server_name) {
Some(cond) => cond,
None => return false,
};
&cond.last_transition_time > create_timestamp
},
)
.await
.expect("must fetch route")
.status
.expect("route must contain a status representation");

// HTTPRoute may not be patched instantly, wrap with a timeout and loop
// until create_timestamp and observed_timestamp are different.
let cond = tokio::time::timeout(tokio::time::Duration::from_secs(60), async move {
loop {
let cond = find_route_condition(
await_route_status(&client, &ns, "test-reconcile-inbound-route")
.await
.parents,
&server.name_unchecked(),
)
.expect("must have least one 'Accepted' condition");

// Observe condition's current timestamp. If it differs from the
// previously recorded timestamp, then it means the underlying
// condition has been updated so we can check the message and
// status.
if &cond.last_transition_time > create_timestamp {
return cond;
}
}
})
.await
.expect("Timed-out waiting for HTTPRoute status update");
let cond = find_route_condition(&route_status.inner.parents, server_name)
.expect("route must have a route condition");
assert_eq!(cond.status, "True");
assert_eq!(cond.reason, "Accepted");
})
Expand All @@ -236,30 +231,27 @@ async fn inbound_accepted_reconcile_parent_delete() {
with_temp_ns(|client, ns| async move {
// Attach a route to a Server and expect the route to be patched with an
// Accepted status.
let server = {
let server = k8s::policy::Server {
metadata: k8s::ObjectMeta {
namespace: Some(ns.to_string()),
name: Some("test-reconcile-delete-server".to_string()),
..Default::default()
},
spec: k8s::policy::ServerSpec {
pod_selector: k8s::labels::Selector::from_iter(Some((
"app",
"test-reconcile-delete-server",
))),
port: k8s::policy::server::Port::Name("http".to_string()),
proxy_protocol: Some(k8s::policy::server::ProxyProtocol::Http1),
},
};
create(&client, server).await
let server_name = "test-reconcile-delete-server";
let server = k8s::policy::Server {
metadata: k8s::ObjectMeta {
namespace: Some(ns.to_string()),
name: Some(server_name.to_string()),
..Default::default()
},
spec: k8s::policy::ServerSpec {
pod_selector: k8s::labels::Selector::from_iter(Some(("app", server_name))),
port: k8s::policy::server::Port::Name("http".to_string()),
proxy_protocol: Some(k8s::policy::server::ProxyProtocol::Http1),
},
};
create(&client, server).await;

// Create parentReference and route
let srv_ref = vec![k8s::policy::httproute::ParentReference {
group: Some("policy.linkerd.io".to_string()),
kind: Some("Server".to_string()),
namespace: Some(ns.clone()),
name: "test-reconcile-delete-server".to_string(),
name: server_name.to_string(),
section_name: None,
port: None,
}];
Expand All @@ -268,13 +260,9 @@ async fn inbound_accepted_reconcile_parent_delete() {
mk_route(&ns, "test-reconcile-delete-route", Some(srv_ref)),
)
.await;
let cond = find_route_condition(
await_route_status(&client, &ns, "test-reconcile-delete-route")
.await
.parents,
&server.name_unchecked(),
)
.expect("must have at least one 'Accepted' condition");
let route_status = await_route_status(&client, &ns, "test-reconcile-delete-route").await;
let cond = find_route_condition(&route_status.parents, server_name)
.expect("must have at least one 'Accepted' condition");
assert_eq!(cond.status, "True");
assert_eq!(cond.reason, "Accepted");

Expand All @@ -290,29 +278,32 @@ async fn inbound_accepted_reconcile_parent_delete() {
.await
.expect("API delete request failed");

// HTTPRoute may not be patched instantly, wrap with a timeout and loop
// until create_timestamp and observed_timestamp are different.
let cond = tokio::time::timeout(tokio::time::Duration::from_secs(60), async move {
loop {
let cond = find_route_condition(
await_route_status(&client, &ns, "test-reconcile-delete-route")
.await
.parents,
&server.name_unchecked(),
)
.expect("must have at least one 'Accepted' condition");

// Observe condition's current timestamp. If it differs from the
// previously recorded timestamp, then it means the underlying
// condition has been updated so we can check the message and
// status.
if &cond.last_transition_time > create_timestamp {
return cond;
}
}
})
// HTTPRoute may not be patched instantly, await the condition where
// create_timestamp and observed_timestamp are different.
let route_status = await_condition(
&client,
&ns,
"test-reconcile-delete-route",
|obj: Option<&k8s::policy::httproute::HttpRoute>| -> bool {
tracing::trace!(?obj, "got route status");
let status = match obj.and_then(|route| route.status.as_ref()) {
Some(status) => status,
None => return false,
};
let cond = match find_route_condition(&status.inner.parents, server_name) {
Some(cond) => cond,
None => return false,
};
&cond.last_transition_time > create_timestamp
},
)
.await
.expect("Timed-out waiting for HTTPRoute status update");
.expect("must fetch route")
.status
.expect("route must contain a status representation");

let cond = find_route_condition(&route_status.inner.parents, server_name)
.expect("route must have a route condition");
assert_eq!(cond.status, "False");
assert_eq!(cond.reason, "NoMatchingParent");
})
Expand Down
Loading

0 comments on commit 225ce93

Please sign in to comment.