From 54f6d94bc09f8d10471338f93e5d86d341b1b062 Mon Sep 17 00:00:00 2001 From: Michael Wilkerson Date: Thu, 9 Mar 2023 21:39:38 -0800 Subject: [PATCH] added more retries for tests --- acceptance/framework/consul/cli_cluster.go | 2 +- acceptance/framework/consul/helm_cluster.go | 75 +++++++++++-------- acceptance/framework/helpers/helpers.go | 2 +- .../framework/portforward/port_forward.go | 2 +- 4 files changed, 45 insertions(+), 36 deletions(-) diff --git a/acceptance/framework/consul/cli_cluster.go b/acceptance/framework/consul/cli_cluster.go index 271f8f2eae..0a3e688951 100644 --- a/acceptance/framework/consul/cli_cluster.go +++ b/acceptance/framework/consul/cli_cluster.go @@ -252,7 +252,7 @@ func (c *CLICluster) SetupConsulClient(t *testing.T, secure bool) (*api.Client, c.logger) // Retry creating the port forward since it can fail occasionally. - retry.RunWith(&retry.Counter{Wait: 1 * time.Second, Count: 3}, t, func(r *retry.R) { + retry.RunWith(&retry.Counter{Wait: 3 * time.Second, Count: 60}, t, func(r *retry.R) { // NOTE: It's okay to pass in `t` to ForwardPortE despite being in a retry // because we're using ForwardPortE (not ForwardPort) so the `t` won't // get used to fail the test, just for logging. diff --git a/acceptance/framework/consul/helm_cluster.go b/acceptance/framework/consul/helm_cluster.go index eab1ba2904..3f4d31173d 100644 --- a/acceptance/framework/consul/helm_cluster.go +++ b/acceptance/framework/consul/helm_cluster.go @@ -142,8 +142,11 @@ func (h *HelmCluster) Destroy(t *testing.T) { h.helmOptions.ExtraArgs = map[string][]string{ "--wait": nil, } - err := helm.DeleteE(t, h.helmOptions, h.releaseName, false) - require.NoError(t, err) + + retry.RunWith(&retry.Counter{Wait: 1 * time.Second, Count: 15}, t, func(r *retry.R) { + err := helm.DeleteE(t, h.helmOptions, h.releaseName, false) + require.NoError(r, err) + }) // Retry because sometimes certain resources (like PVC) take time to delete // in cloud providers. @@ -324,22 +327,25 @@ func (h *HelmCluster) SetupConsulClient(t *testing.T, secure bool) (client *api. if h.ACLToken != "" { config.Token = h.ACLToken } else { - // Get the ACL token. First, attempt to read it from the bootstrap token (this will be true in primary Consul servers). - // If the bootstrap token doesn't exist, it means we are running against a secondary cluster - // and will try to read the replication token from the federation secret. - // In secondary servers, we don't create a bootstrap token since ACLs are only bootstrapped in the primary. - // Instead, we provide a replication token that serves the role of the bootstrap token. - aclSecret, err := h.kubernetesClient.CoreV1().Secrets(namespace).Get(context.Background(), h.releaseName+"-consul-bootstrap-acl-token", metav1.GetOptions{}) - if err != nil && errors.IsNotFound(err) { - federationSecret := fmt.Sprintf("%s-consul-federation", h.releaseName) - aclSecret, err = h.kubernetesClient.CoreV1().Secrets(namespace).Get(context.Background(), federationSecret, metav1.GetOptions{}) - require.NoError(t, err) - config.Token = string(aclSecret.Data["replicationToken"]) - } else if err == nil { - config.Token = string(aclSecret.Data["token"]) - } else { - require.NoError(t, err) - } + retry.RunWith(&retry.Counter{Wait: 1 * time.Second, Count: 600}, t, func(r *retry.R) { + // Get the ACL token. First, attempt to read it from the bootstrap token (this will be true in primary Consul servers). + // If the bootstrap token doesn't exist, it means we are running against a secondary cluster + // and will try to read the replication token from the federation secret. + // In secondary servers, we don't create a bootstrap token since ACLs are only bootstrapped in the primary. + // Instead, we provide a replication token that serves the role of the bootstrap token. + aclSecret, err := h.kubernetesClient.CoreV1().Secrets(namespace).Get(context.Background(), h.releaseName+"-consul-bootstrap-acl-token", metav1.GetOptions{}) + if err != nil && errors.IsNotFound(err) { + federationSecret := fmt.Sprintf("%s-consul-federation", h.releaseName) + aclSecret, err = h.kubernetesClient.CoreV1().Secrets(namespace).Get(context.Background(), federationSecret, metav1.GetOptions{}) + require.NoError(r, err) + config.Token = string(aclSecret.Data["replicationToken"]) + } else if err == nil { + config.Token = string(aclSecret.Data["token"]) + } else { + require.NoError(r, err) + } + }) + } } @@ -524,21 +530,24 @@ func defaultValues() map[string]string { } func CreateK8sSecret(t *testing.T, client kubernetes.Interface, cfg *config.TestConfig, namespace, secretName, secretKey, secret string) { - _, err := client.CoreV1().Secrets(namespace).Get(context.Background(), secretName, metav1.GetOptions{}) - if errors.IsNotFound(err) { - _, err := client.CoreV1().Secrets(namespace).Create(context.Background(), &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: secretName, - }, - StringData: map[string]string{ - secretKey: secret, - }, - Type: corev1.SecretTypeOpaque, - }, metav1.CreateOptions{}) - require.NoError(t, err) - } else { - require.NoError(t, err) - } + + retry.RunWith(&retry.Counter{Wait: 1 * time.Second, Count: 15}, t, func(r *retry.R) { + _, err := client.CoreV1().Secrets(namespace).Get(context.Background(), secretName, metav1.GetOptions{}) + if errors.IsNotFound(err) { + _, err := client.CoreV1().Secrets(namespace).Create(context.Background(), &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + }, + StringData: map[string]string{ + secretKey: secret, + }, + Type: corev1.SecretTypeOpaque, + }, metav1.CreateOptions{}) + require.NoError(r, err) + } else { + require.NoError(r, err) + } + }) helpers.Cleanup(t, cfg.NoCleanupOnFailure, func() { _ = client.CoreV1().Secrets(namespace).Delete(context.Background(), secretName, metav1.DeleteOptions{}) diff --git a/acceptance/framework/helpers/helpers.go b/acceptance/framework/helpers/helpers.go index 2c1a6ebf47..5f99a682ae 100644 --- a/acceptance/framework/helpers/helpers.go +++ b/acceptance/framework/helpers/helpers.go @@ -35,7 +35,7 @@ func CheckForPriorInstallations(t *testing.T, client kubernetes.Interface, optio // Check if there's an existing cluster and fail if there is one. // We may need to retry since this is the first command run once the Kube // cluster is created and sometimes the API server returns errors. - retry.RunWith(&retry.Counter{Wait: 1 * time.Second, Count: 3}, t, func(r *retry.R) { + retry.RunWith(&retry.Counter{Wait: 1 * time.Second, Count: 15}, t, func(r *retry.R) { var err error // NOTE: It's okay to pass in `t` to RunHelmCommandAndGetOutputE despite being in a retry // because we're using RunHelmCommandAndGetOutputE (not RunHelmCommandAndGetOutput) so the `t` won't diff --git a/acceptance/framework/portforward/port_forward.go b/acceptance/framework/portforward/port_forward.go index 97bf3f7856..30f4aed157 100644 --- a/acceptance/framework/portforward/port_forward.go +++ b/acceptance/framework/portforward/port_forward.go @@ -24,7 +24,7 @@ func CreateTunnelToResourcePort(t *testing.T, resourceName string, remotePort in logger) // Retry creating the port forward since it can fail occasionally. - retry.RunWith(&retry.Counter{Wait: 1 * time.Second, Count: 3}, t, func(r *retry.R) { + retry.RunWith(&retry.Counter{Wait: 3 * time.Second, Count: 60}, t, func(r *retry.R) { // NOTE: It's okay to pass in `t` to ForwardPortE despite being in a retry // because we're using ForwardPortE (not ForwardPort) so the `t` won't // get used to fail the test, just for logging.