From 225ce93c0fd2ceecc7b409f530be4869e42fb0d5 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Mon, 23 Oct 2023 15:57:30 -0700 Subject: [PATCH] Improve route status reconcile integration tests (#11456) The route status reconcile integration tests are flaky and often spuriously fail in CI. The assertion which fails is that the `Accepted` status should be true after reconciliation but it is observed to be false. I have not been able to reproduce this failure locally so it is hard to determine what causes these failures. However, I noticed that these tests utilize a loop without any backoff and repeatedly request a route from the kubernetes API until the status modified timestamp is updated. This hot loop results in a very large number of kubernetes API requests very quickly. It's not clear if these excessive requests could cause the kubernetes API to become overloaded and result in the flaky failures, but it seems like something that's worth fixing regardless. We replace the hot loop with a usage of `await_condition` so that we take advantage of a watch on the route instead of repeatedly polling it. This results is far fewer API calls. We also add some logging of the status each time we retrieve it. This will be helpful in understanding why these tests fail, if they fail again in the future. Signed-off-by: Alex Leong --- policy-test/src/lib.rs | 16 +- .../tests/inbound_http_route_status.rs | 197 +++++++++--------- .../tests/outbound_http_route_status.rs | 23 +- 3 files changed, 113 insertions(+), 123 deletions(-) diff --git a/policy-test/src/lib.rs b/policy-test/src/lib.rs index e677c077c5b24..91f173a911705 100644 --- a/policy-test/src/lib.rs +++ b/policy-test/src/lib.rs @@ -158,14 +158,16 @@ pub async fn await_route_status( name: &str, ) -> k8s::policy::httproute::RouteStatus { use k8s::policy::httproute as api; - await_condition(client, ns, name, |obj: Option<&api::HttpRoute>| -> bool { + let route_status = await_condition(client, ns, name, |obj: Option<&api::HttpRoute>| -> bool { obj.and_then(|route| route.status.as_ref()).is_some() }) .await .expect("must fetch route") .status .expect("route must contain a status representation") - .inner + .inner; + tracing::trace!(?route_status, name, ns, "got route status"); + route_status } // Wait for the endpoints controller to populate the Endpoints resource. @@ -286,16 +288,16 @@ pub fn mk_route( } } -pub fn find_route_condition( - statuses: impl IntoIterator, - parent_name: &str, -) -> Option { +pub fn find_route_condition<'a>( + statuses: impl IntoIterator, + parent_name: &'static str, +) -> Option<&'a k8s::Condition> { statuses .into_iter() .find(|route_status| route_status.parent_ref.name == parent_name) .expect("route must have at least one status set") .conditions - .into_iter() + .iter() .find(|cond| cond.type_ == "Accepted") } diff --git a/policy-test/tests/inbound_http_route_status.rs b/policy-test/tests/inbound_http_route_status.rs index dc3ca97fb702b..d79bf220d7af2 100644 --- a/policy-test/tests/inbound_http_route_status.rs +++ b/policy-test/tests/inbound_http_route_status.rs @@ -1,24 +1,22 @@ use kube::ResourceExt; use linkerd_policy_controller_k8s_api as k8s; use linkerd_policy_test::{ - await_route_status, create, find_route_condition, mk_route, with_temp_ns, + await_condition, await_route_status, create, find_route_condition, mk_route, with_temp_ns, }; #[tokio::test(flavor = "current_thread")] async fn inbound_accepted_parent() { with_temp_ns(|client, ns| async move { // Create a test 'Server' + let server_name = "test-accepted-server"; let server = k8s::policy::Server { metadata: k8s::ObjectMeta { namespace: Some(ns.to_string()), - name: Some("test-accepted-server".to_string()), + name: Some(server_name.to_string()), ..Default::default() }, spec: k8s::policy::ServerSpec { - pod_selector: k8s::labels::Selector::from_iter(Some(( - "app", - "test-accepted-server", - ))), + pod_selector: k8s::labels::Selector::from_iter(Some(("app", server_name))), port: k8s::policy::server::Port::Name("http".to_string()), proxy_protocol: Some(k8s::policy::server::ProxyProtocol::Http1), }, @@ -43,7 +41,7 @@ async fn inbound_accepted_parent() { let route_status = statuses .clone() .into_iter() - .find(|route_status| route_status.parent_ref.name == server.name_unchecked()) + .find(|route_status| route_status.parent_ref.name == server_name) .expect("must have at least one parent status"); // Check status references to parent we have created @@ -54,7 +52,7 @@ async fn inbound_accepted_parent() { assert_eq!(route_status.parent_ref.kind.as_deref(), Some("Server")); // Check status is accepted with a status of 'True' - let cond = find_route_condition(statuses, &server.name_unchecked()) + let cond = find_route_condition(&statuses, server_name) .expect("must have at least one 'Accepted' condition for accepted server"); assert_eq!(cond.status, "True"); assert_eq!(cond.reason, "Accepted") @@ -112,14 +110,14 @@ async fn inbound_multiple_parents() { .parents; // Find status for invalid parent and extract the condition - let invalid_cond = find_route_condition(parent_status.clone(), "test-invalid-server") + let invalid_cond = find_route_condition(&parent_status, "test-invalid-server") .expect("must have at least one 'Accepted' condition set for invalid parent"); // Route shouldn't be accepted assert_eq!(invalid_cond.status, "False"); assert_eq!(invalid_cond.reason, "NoMatchingParent"); // Find status for valid parent and extract the condition - let valid_cond = find_route_condition(parent_status, "test-valid-server") + let valid_cond = find_route_condition(&parent_status, "test-valid-server") .expect("must have at least one 'Accepted' condition set for valid parent"); assert_eq!(valid_cond.status, "True"); assert_eq!(valid_cond.reason, "Accepted") @@ -154,11 +152,12 @@ async fn inbound_accepted_reconcile_no_parent() { with_temp_ns(|client, ns| async move { // Given a route with a nonexistent parentReference, we expect to have an // 'Accepted' condition with 'False' as a status. + let server_name = "test-reconcile-inbound-server"; let srv_ref = vec![k8s::policy::httproute::ParentReference { group: Some("policy.linkerd.io".to_string()), kind: Some("Server".to_string()), namespace: Some(ns.clone()), - name: "test-reconcile-inbound-server".to_string(), + name: server_name.to_string(), section_name: None, port: None, }]; @@ -167,13 +166,9 @@ async fn inbound_accepted_reconcile_no_parent() { mk_route(&ns, "test-reconcile-inbound-route", Some(srv_ref)), ) .await; - let cond = find_route_condition( - await_route_status(&client, &ns, "test-reconcile-inbound-route") - .await - .parents, - "test-reconcile-inbound-server", - ) - .expect("must have at least one 'Accepted' condition set for parent"); + let route_status = await_route_status(&client, &ns, "test-reconcile-inbound-route").await; + let cond = find_route_condition(&route_status.parents, server_name) + .expect("must have at least one 'Accepted' condition set for parent"); // Test when parent ref does not exist we get Accepted { False }. assert_eq!(cond.status, "False"); assert_eq!(cond.reason, "NoMatchingParent"); @@ -183,48 +178,48 @@ async fn inbound_accepted_reconcile_no_parent() { // Create the 'Server' that route references and expect it to be picked up // by the index. Consequently, route will have its status reconciled. - let server = { - let server = k8s::policy::Server { - metadata: k8s::ObjectMeta { - namespace: Some(ns.to_string()), - name: Some("test-reconcile-inbound-server".to_string()), - ..Default::default() - }, - spec: k8s::policy::ServerSpec { - pod_selector: k8s::labels::Selector::from_iter(Some(( - "app", - "test-reconcile-inbound-server", - ))), - port: k8s::policy::server::Port::Name("http".to_string()), - proxy_protocol: Some(k8s::policy::server::ProxyProtocol::Http1), - }, - }; - create(&client, server).await + let server = k8s::policy::Server { + metadata: k8s::ObjectMeta { + namespace: Some(ns.to_string()), + name: Some(server_name.to_string()), + ..Default::default() + }, + spec: k8s::policy::ServerSpec { + pod_selector: k8s::labels::Selector::from_iter(Some(("app", server_name))), + port: k8s::policy::server::Port::Name("http".to_string()), + proxy_protocol: Some(k8s::policy::server::ProxyProtocol::Http1), + }, }; + create(&client, server).await; + + // HTTPRoute may not be patched instantly, await the condition where + // create_timestamp and observed_timestamp are different. + let route_status = await_condition( + &client, + &ns, + "test-reconcile-inbound-route", + |obj: Option<&k8s::policy::httproute::HttpRoute>| -> bool { + tracing::trace!(?obj, "got route status"); + let status = match obj.and_then(|route| route.status.as_ref()) { + Some(status) => status, + None => return false, + }; + let cond = match find_route_condition(&status.inner.parents, server_name) { + Some(cond) => cond, + None => return false, + }; + &cond.last_transition_time > create_timestamp + }, + ) + .await + .expect("must fetch route") + .status + .expect("route must contain a status representation"); // HTTPRoute may not be patched instantly, wrap with a timeout and loop // until create_timestamp and observed_timestamp are different. - let cond = tokio::time::timeout(tokio::time::Duration::from_secs(60), async move { - loop { - let cond = find_route_condition( - await_route_status(&client, &ns, "test-reconcile-inbound-route") - .await - .parents, - &server.name_unchecked(), - ) - .expect("must have least one 'Accepted' condition"); - - // Observe condition's current timestamp. If it differs from the - // previously recorded timestamp, then it means the underlying - // condition has been updated so we can check the message and - // status. - if &cond.last_transition_time > create_timestamp { - return cond; - } - } - }) - .await - .expect("Timed-out waiting for HTTPRoute status update"); + let cond = find_route_condition(&route_status.inner.parents, server_name) + .expect("route must have a route condition"); assert_eq!(cond.status, "True"); assert_eq!(cond.reason, "Accepted"); }) @@ -236,30 +231,27 @@ async fn inbound_accepted_reconcile_parent_delete() { with_temp_ns(|client, ns| async move { // Attach a route to a Server and expect the route to be patched with an // Accepted status. - let server = { - let server = k8s::policy::Server { - metadata: k8s::ObjectMeta { - namespace: Some(ns.to_string()), - name: Some("test-reconcile-delete-server".to_string()), - ..Default::default() - }, - spec: k8s::policy::ServerSpec { - pod_selector: k8s::labels::Selector::from_iter(Some(( - "app", - "test-reconcile-delete-server", - ))), - port: k8s::policy::server::Port::Name("http".to_string()), - proxy_protocol: Some(k8s::policy::server::ProxyProtocol::Http1), - }, - }; - create(&client, server).await + let server_name = "test-reconcile-delete-server"; + let server = k8s::policy::Server { + metadata: k8s::ObjectMeta { + namespace: Some(ns.to_string()), + name: Some(server_name.to_string()), + ..Default::default() + }, + spec: k8s::policy::ServerSpec { + pod_selector: k8s::labels::Selector::from_iter(Some(("app", server_name))), + port: k8s::policy::server::Port::Name("http".to_string()), + proxy_protocol: Some(k8s::policy::server::ProxyProtocol::Http1), + }, }; + create(&client, server).await; + // Create parentReference and route let srv_ref = vec![k8s::policy::httproute::ParentReference { group: Some("policy.linkerd.io".to_string()), kind: Some("Server".to_string()), namespace: Some(ns.clone()), - name: "test-reconcile-delete-server".to_string(), + name: server_name.to_string(), section_name: None, port: None, }]; @@ -268,13 +260,9 @@ async fn inbound_accepted_reconcile_parent_delete() { mk_route(&ns, "test-reconcile-delete-route", Some(srv_ref)), ) .await; - let cond = find_route_condition( - await_route_status(&client, &ns, "test-reconcile-delete-route") - .await - .parents, - &server.name_unchecked(), - ) - .expect("must have at least one 'Accepted' condition"); + let route_status = await_route_status(&client, &ns, "test-reconcile-delete-route").await; + let cond = find_route_condition(&route_status.parents, server_name) + .expect("must have at least one 'Accepted' condition"); assert_eq!(cond.status, "True"); assert_eq!(cond.reason, "Accepted"); @@ -290,29 +278,32 @@ async fn inbound_accepted_reconcile_parent_delete() { .await .expect("API delete request failed"); - // HTTPRoute may not be patched instantly, wrap with a timeout and loop - // until create_timestamp and observed_timestamp are different. - let cond = tokio::time::timeout(tokio::time::Duration::from_secs(60), async move { - loop { - let cond = find_route_condition( - await_route_status(&client, &ns, "test-reconcile-delete-route") - .await - .parents, - &server.name_unchecked(), - ) - .expect("must have at least one 'Accepted' condition"); - - // Observe condition's current timestamp. If it differs from the - // previously recorded timestamp, then it means the underlying - // condition has been updated so we can check the message and - // status. - if &cond.last_transition_time > create_timestamp { - return cond; - } - } - }) + // HTTPRoute may not be patched instantly, await the condition where + // create_timestamp and observed_timestamp are different. + let route_status = await_condition( + &client, + &ns, + "test-reconcile-delete-route", + |obj: Option<&k8s::policy::httproute::HttpRoute>| -> bool { + tracing::trace!(?obj, "got route status"); + let status = match obj.and_then(|route| route.status.as_ref()) { + Some(status) => status, + None => return false, + }; + let cond = match find_route_condition(&status.inner.parents, server_name) { + Some(cond) => cond, + None => return false, + }; + &cond.last_transition_time > create_timestamp + }, + ) .await - .expect("Timed-out waiting for HTTPRoute status update"); + .expect("must fetch route") + .status + .expect("route must contain a status representation"); + + let cond = find_route_condition(&route_status.inner.parents, server_name) + .expect("route must have a route condition"); assert_eq!(cond.status, "False"); assert_eq!(cond.reason, "NoMatchingParent"); }) diff --git a/policy-test/tests/outbound_http_route_status.rs b/policy-test/tests/outbound_http_route_status.rs index 68689502bbf92..7fa18c4822d3e 100644 --- a/policy-test/tests/outbound_http_route_status.rs +++ b/policy-test/tests/outbound_http_route_status.rs @@ -8,10 +8,11 @@ use linkerd_policy_test::{ async fn accepted_parent() { with_temp_ns(|client, ns| async move { // Create a parent Service + let svc_name = "test-service"; let svc = k8s::Service { metadata: k8s::ObjectMeta { namespace: Some(ns.clone()), - name: Some("test-service".to_string()), + name: Some(svc_name.to_string()), ..Default::default() }, spec: Some(k8s::ServiceSpec { @@ -42,7 +43,7 @@ async fn accepted_parent() { let route_status = statuses .clone() .into_iter() - .find(|route_status| route_status.parent_ref.name == svc.name_unchecked()) + .find(|route_status| route_status.parent_ref.name == svc_name) .expect("must have at least one parent status"); // Check status references to parent we have created @@ -50,7 +51,7 @@ async fn accepted_parent() { assert_eq!(route_status.parent_ref.kind.as_deref(), Some("Service")); // Check status is accepted with a status of 'True' - let cond = find_route_condition(statuses, &svc.name_unchecked()) + let cond = find_route_condition(&statuses, svc_name) .expect("must have at least one 'Accepted' condition for accepted servuce"); assert_eq!(cond.status, "True"); assert_eq!(cond.reason, "Accepted") @@ -92,11 +93,9 @@ async fn no_cluster_ip() { // Create a route that references the Service resource. let _route = create(&client, mk_route(&ns, "test-route", Some(svc_ref))).await; // Wait until route is updated with a status - let cond = find_route_condition( - await_route_status(&client, &ns, "test-route").await.parents, - "test-service", - ) - .expect("must have at least one 'Accepted' condition set for parent"); + let status = await_route_status(&client, &ns, "test-route").await; + let cond = find_route_condition(&status.parents, "test-service") + .expect("must have at least one 'Accepted' condition set for parent"); // Parent with no ClusterIP should not match. assert_eq!(cond.status, "False"); assert_eq!(cond.reason, "NoMatchingParent"); @@ -138,11 +137,9 @@ async fn external_name() { // Create a route that references the Service resource. let _route = create(&client, mk_route(&ns, "test-route", Some(svc_ref))).await; // Wait until route is updated with a status - let cond = find_route_condition( - await_route_status(&client, &ns, "test-route").await.parents, - "test-service", - ) - .expect("must have at least one 'Accepted' condition set for parent"); + let status = await_route_status(&client, &ns, "test-route").await; + let cond = find_route_condition(&status.parents, "test-service") + .expect("must have at least one 'Accepted' condition set for parent"); // Parent with ExternalName should not match. assert_eq!(cond.status, "False"); assert_eq!(cond.reason, "NoMatchingParent");