diff --git a/tests/e2e/README.md b/tests/e2e/README.md index e4ee6a3b5..389351081 100644 --- a/tests/e2e/README.md +++ b/tests/e2e/README.md @@ -16,7 +16,7 @@ This end-to-end test suite utilizes Ginkgo, a testing framework known for its ex 1. [How to Run the test](#how-to-run-the-test) 1. [Running the test locally](#running-the-test-locally) 1. [Settings for end-to-end test execution](#settings-for-end-to-end-test-execution) - 1. [Customizing the test run](#customizing-the-test-run) + 1. [Customizing the test run](#customizing-the-test-run) 1. [Get test definitions for the end-to-end test](#get-test-definitions-for-the-end-to-end-test) 1. [Contributing](#contributing) @@ -69,7 +69,7 @@ var _ = Describe("Operator", Label("labels-for-the-test"), Ordered, func() { }) }) }) -``` +``` Note: The `Label` function is used to label the test. This is useful when you want to run a specific test or a group of tests. The label can be used to filter the tests when running them. Ordered is used to run the tests in the order they are defined. This is useful when you want to run the tests in a specific order. @@ -133,7 +133,7 @@ var _ = Describe("Operator", func() { }) }) }) -``` +``` * Add Labels to the tests. This is useful when you want to run a specific test or a group of tests. The label can be used to filter the tests when running them. For example: ```go var _ = Describe("Ambient configuration ", Label("smoke", "ambient"), Ordered, func() { @@ -192,6 +192,37 @@ var _ = Describe("Testing with cleanup", Ordered, func() { ``` * You can use multiple cleaners, each with its own state. This is useful if the test does some global set up, e.g. sets up the operator, and then specific tests create further resources which you want cleaned. * To clean resources without waiting, and waiting for them later, use `CleanupNoWait` followed by `WaitForDeletion`. This is particularly useful when working with more than one cluster. +* Use `debugcollector` to collect comprehensive debug information when tests fail. The debug collector captures the cluster state and saves it as artifacts for easier debugging. Like the cleaner, it records initial state and collects debug information on test failure. For example: +```go +import "github.com/istio-ecosystem/sail-operator/tests/e2e/util/debugcollector" + +var _ = Describe("Testing with debug collection", Ordered, func() { + collector := debugcollector.New(cl, k, "test-suite-name") + + BeforeAll(func(ctx SpecContext) { + collector.Record(ctx) + // Any additional set up goes here + }) + + // Tests go here + + AfterAll(func(ctx SpecContext) { + if CurrentSpecReport().Failed() { + collector.CollectAndSave(ctx) + } + // Any cleanup logic goes here + }) +}) +``` + * The debug collector saves artifacts to the `$ARTIFACTS` directory (or `/tmp` if not set) with a timestamped folder structure + * Collected information includes: pod YAMLs, logs, events, deployments, daemonsets, services, configmaps, and custom resources + * You can control the collection depth with the `DEBUG_COLLECTOR_DEPTH` environment variable (values: `full`, `minimal`, `logs-only`) + * For multicluster tests, create separate collectors for each cluster with descriptive names + * The artifacts directory structure is organized as: + - `debug--/` + - `cluster-scoped/` - cluster-wide resources + - `namespaces//` - namespace-specific resources, logs, and events + - `istioctl/` - istioctl proxy-status output ## Running the tests The end-to-end test can be run in two different environments: OCP (OpenShift Container Platform) and KinD (Kubernetes in Docker). @@ -405,7 +436,7 @@ make test.e2e.describe ``` When you run this target, the test definitions will be printed to the console with format `indent`. For example: - + ``` Name,Text,Start,End,Spec,Focused,Pending,Labels Describe,Operator,882,7688,false,false,false,"" diff --git a/tests/e2e/ambient/ambient_test.go b/tests/e2e/ambient/ambient_test.go index deb7c78a8..5b7d33887 100644 --- a/tests/e2e/ambient/ambient_test.go +++ b/tests/e2e/ambient/ambient_test.go @@ -27,6 +27,7 @@ import ( . "github.com/istio-ecosystem/sail-operator/pkg/test/util/ginkgo" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/cleaner" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/common" + "github.com/istio-ecosystem/sail-operator/tests/e2e/util/debugcollector" . "github.com/istio-ecosystem/sail-operator/tests/e2e/util/gomega" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -55,8 +56,10 @@ var _ = Describe("Ambient configuration ", Label("smoke", "ambient"), Ordered, f Context(fmt.Sprintf("Istio version %s", version.Version), func() { clr := cleaner.New(cl) + collector := debugcollector.New(cl, k, "ambient") BeforeAll(func(ctx SpecContext) { clr.Record(ctx) + collector.Record(ctx) Expect(k.CreateNamespace(controlPlaneNamespace)).To(Succeed(), "Istio namespace failed to be created") Expect(k.CreateNamespace(istioCniNamespace)).To(Succeed(), "IstioCNI namespace failed to be created") Expect(k.CreateNamespace(ztunnelNamespace)).To(Succeed(), "ZTunnel namespace failed to be created") @@ -296,8 +299,11 @@ spec: }) AfterAll(func(ctx SpecContext) { - if CurrentSpecReport().Failed() && keepOnFailure { - return + if CurrentSpecReport().Failed() { + collector.CollectAndSave(ctx) + if keepOnFailure { + return + } } clr.Cleanup(ctx) diff --git a/tests/e2e/controlplane/control_plane_test.go b/tests/e2e/controlplane/control_plane_test.go index 719c956aa..5177fcefe 100644 --- a/tests/e2e/controlplane/control_plane_test.go +++ b/tests/e2e/controlplane/control_plane_test.go @@ -28,6 +28,7 @@ import ( . "github.com/istio-ecosystem/sail-operator/pkg/test/util/ginkgo" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/cleaner" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/common" + "github.com/istio-ecosystem/sail-operator/tests/e2e/util/debugcollector" . "github.com/istio-ecosystem/sail-operator/tests/e2e/util/gomega" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/istioctl" . "github.com/onsi/ginkgo/v2" @@ -100,8 +101,10 @@ metadata: for _, version := range istioversion.GetLatestPatchVersions() { Context(version.Name, func() { clr := cleaner.New(cl) + collector := debugcollector.New(cl, k, "control-plane") BeforeAll(func(ctx SpecContext) { clr.Record(ctx) + collector.Record(ctx) Expect(k.CreateNamespace(controlPlaneNamespace)).To(Succeed(), "Istio namespace failed to be created") Expect(k.CreateNamespace(istioCniNamespace)).To(Succeed(), "IstioCNI namespace failed to be created") }) @@ -239,8 +242,11 @@ metadata: }) AfterAll(func(ctx SpecContext) { - if CurrentSpecReport().Failed() && keepOnFailure { - return + if CurrentSpecReport().Failed() { + collector.CollectAndSave(ctx) + if keepOnFailure { + return + } } clr.Cleanup(ctx) diff --git a/tests/e2e/controlplane/control_plane_update_test.go b/tests/e2e/controlplane/control_plane_update_test.go index 7c3818c86..18826e570 100644 --- a/tests/e2e/controlplane/control_plane_update_test.go +++ b/tests/e2e/controlplane/control_plane_update_test.go @@ -28,6 +28,7 @@ import ( . "github.com/istio-ecosystem/sail-operator/pkg/test/util/ginkgo" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/cleaner" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/common" + "github.com/istio-ecosystem/sail-operator/tests/e2e/util/debugcollector" . "github.com/istio-ecosystem/sail-operator/tests/e2e/util/gomega" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -50,6 +51,7 @@ var _ = Describe("Control Plane updates", Label("control-plane", "slow"), Ordere Context(istioversion.Base, func() { clr := cleaner.New(cl) + collector := debugcollector.New(cl, k, "control-plane-update") BeforeAll(func(ctx SpecContext) { if len(istioversion.List) < 2 { @@ -57,6 +59,7 @@ var _ = Describe("Control Plane updates", Label("control-plane", "slow"), Ordere } clr.Record(ctx) + collector.Record(ctx) Expect(k.CreateNamespace(controlPlaneNamespace)).To(Succeed(), "Istio namespace failed to be created") Expect(k.CreateNamespace(istioCniNamespace)).To(Succeed(), "IstioCNI namespace failed to be created") @@ -277,6 +280,7 @@ spec: AfterAll(func(ctx SpecContext) { if CurrentSpecReport().Failed() { + collector.CollectAndSave(ctx) common.LogDebugInfo(common.ControlPlane, k) debugInfoLogged = true if keepOnFailure { diff --git a/tests/e2e/dualstack/dualstack_test.go b/tests/e2e/dualstack/dualstack_test.go index 817b54d75..10e3fe0cf 100644 --- a/tests/e2e/dualstack/dualstack_test.go +++ b/tests/e2e/dualstack/dualstack_test.go @@ -27,6 +27,7 @@ import ( . "github.com/istio-ecosystem/sail-operator/pkg/test/util/ginkgo" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/cleaner" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/common" + "github.com/istio-ecosystem/sail-operator/tests/e2e/util/debugcollector" . "github.com/istio-ecosystem/sail-operator/tests/e2e/util/gomega" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -59,8 +60,10 @@ var _ = Describe("DualStack configuration ", Label("dualstack"), Ordered, func() Context(fmt.Sprintf("Istio version %s", version.Version), func() { clr := cleaner.New(cl) + collector := debugcollector.New(cl, k, "dualstack") BeforeAll(func(ctx SpecContext) { clr.Record(ctx) + collector.Record(ctx) Expect(k.CreateNamespace(controlPlaneNamespace)).To(Succeed(), "Istio namespace failed to be created") Expect(k.CreateNamespace(istioCniNamespace)).To(Succeed(), "IstioCNI namespace failed to be created") }) @@ -229,8 +232,11 @@ values: }) AfterAll(func(ctx SpecContext) { - if CurrentSpecReport().Failed() && keepOnFailure { - return + if CurrentSpecReport().Failed() { + collector.CollectAndSave(ctx) + if keepOnFailure { + return + } } clr.Cleanup(ctx) diff --git a/tests/e2e/multicluster/multicluster_externalcontrolplane_test.go b/tests/e2e/multicluster/multicluster_externalcontrolplane_test.go index bbadb18b4..8909c4a55 100644 --- a/tests/e2e/multicluster/multicluster_externalcontrolplane_test.go +++ b/tests/e2e/multicluster/multicluster_externalcontrolplane_test.go @@ -27,6 +27,7 @@ import ( "github.com/istio-ecosystem/sail-operator/pkg/version" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/cleaner" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/common" + "github.com/istio-ecosystem/sail-operator/tests/e2e/util/debugcollector" . "github.com/istio-ecosystem/sail-operator/tests/e2e/util/gomega" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/istioctl" . "github.com/onsi/ginkgo/v2" @@ -59,10 +60,14 @@ var _ = Describe("Multicluster deployment models", Label("multicluster", "multic Context(fmt.Sprintf("Istio version %s", v.Version), func() { clr1 := cleaner.New(clPrimary, "cluster=primary") clr2 := cleaner.New(clRemote, "cluster=remote") + collector1 := debugcollector.New(clPrimary, k1, "multicluster-primary") + collector2 := debugcollector.New(clRemote, k2, "multicluster-remote") BeforeAll(func(ctx SpecContext) { clr1.Record(ctx) clr2.Record(ctx) + collector1.Record(ctx) + collector2.Record(ctx) }) When("default Istio is created in Cluster #1 to handle ingress to External Control Plane", func() { @@ -412,6 +417,8 @@ spec: AfterAll(func(ctx SpecContext) { if CurrentSpecReport().Failed() { + collector1.CollectAndSave(ctx) + collector2.CollectAndSave(ctx) common.LogDebugInfo(common.MultiCluster, k1, k2) debugInfoLogged = true if keepOnFailure { diff --git a/tests/e2e/multicluster/multicluster_multiprimary_test.go b/tests/e2e/multicluster/multicluster_multiprimary_test.go index 2994fc3ab..29d37b0b9 100644 --- a/tests/e2e/multicluster/multicluster_multiprimary_test.go +++ b/tests/e2e/multicluster/multicluster_multiprimary_test.go @@ -28,6 +28,7 @@ import ( "github.com/istio-ecosystem/sail-operator/tests/e2e/util/certs" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/cleaner" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/common" + "github.com/istio-ecosystem/sail-operator/tests/e2e/util/debugcollector" . "github.com/istio-ecosystem/sail-operator/tests/e2e/util/gomega" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/istioctl" . "github.com/onsi/ginkgo/v2" @@ -48,10 +49,14 @@ var _ = Describe("Multicluster deployment models", Label("multicluster", "multic Context(fmt.Sprintf("Istio version %s", version.Version), func() { clr1 := cleaner.New(clPrimary, "cluster=primary") clr2 := cleaner.New(clRemote, "cluster=remote") + collector1 := debugcollector.New(clPrimary, k1, "multicluster-primary") + collector2 := debugcollector.New(clRemote, k2, "multicluster-remote") BeforeAll(func(ctx SpecContext) { clr1.Record(ctx) clr2.Record(ctx) + collector1.Record(ctx) + collector2.Record(ctx) }) When("Istio and IstioCNI resources are created in both clusters", func() { @@ -291,6 +296,8 @@ values: AfterAll(func(ctx SpecContext) { if CurrentSpecReport().Failed() { + collector1.CollectAndSave(ctx) + collector2.CollectAndSave(ctx) common.LogDebugInfo(common.MultiCluster, k1, k2) debugInfoLogged = true if keepOnFailure { diff --git a/tests/e2e/multicluster/multicluster_primaryremote_test.go b/tests/e2e/multicluster/multicluster_primaryremote_test.go index f0f15a978..0e6320035 100644 --- a/tests/e2e/multicluster/multicluster_primaryremote_test.go +++ b/tests/e2e/multicluster/multicluster_primaryremote_test.go @@ -29,6 +29,7 @@ import ( "github.com/istio-ecosystem/sail-operator/tests/e2e/util/certs" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/cleaner" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/common" + "github.com/istio-ecosystem/sail-operator/tests/e2e/util/debugcollector" . "github.com/istio-ecosystem/sail-operator/tests/e2e/util/gomega" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/istioctl" . "github.com/onsi/ginkgo/v2" @@ -55,10 +56,14 @@ var _ = Describe("Multicluster deployment models", Label("multicluster", "multic Context(fmt.Sprintf("Istio version %s", v.Version), func() { clr1 := cleaner.New(clPrimary, "cluster=primary") clr2 := cleaner.New(clRemote, "cluster=remote") + collector1 := debugcollector.New(clPrimary, k1, "multicluster-primary") + collector2 := debugcollector.New(clRemote, k2, "multicluster-remote") BeforeAll(func(ctx SpecContext) { clr1.Record(ctx) clr2.Record(ctx) + collector1.Record(ctx) + collector2.Record(ctx) }) When("Istio and IstioCNI resources are created in both clusters", func() { @@ -308,6 +313,8 @@ values: AfterAll(func(ctx SpecContext) { if CurrentSpecReport().Failed() { + collector1.CollectAndSave(ctx) + collector2.CollectAndSave(ctx) common.LogDebugInfo(common.MultiCluster, k1, k2) debugInfoLogged = true if keepOnFailure { diff --git a/tests/e2e/multicontrolplane/multi_control_plane_test.go b/tests/e2e/multicontrolplane/multi_control_plane_test.go index 18fb8d94c..ace56fab9 100644 --- a/tests/e2e/multicontrolplane/multi_control_plane_test.go +++ b/tests/e2e/multicontrolplane/multi_control_plane_test.go @@ -26,6 +26,7 @@ import ( . "github.com/istio-ecosystem/sail-operator/pkg/test/util/ginkgo" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/cleaner" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/common" + "github.com/istio-ecosystem/sail-operator/tests/e2e/util/debugcollector" . "github.com/istio-ecosystem/sail-operator/tests/e2e/util/gomega" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -43,9 +44,11 @@ var _ = Describe("Multi control plane deployment model", Label("smoke", "multico for _, version := range istioversion.GetLatestPatchVersions() { Context(fmt.Sprintf("Istio version %s", version.Version), func() { clr := cleaner.New(cl) + collector := debugcollector.New(cl, k, "multicontrol-plane") BeforeAll(func(ctx SpecContext) { clr.Record(ctx) + collector.Record(ctx) }) Describe("Installation", func() { @@ -161,6 +164,7 @@ spec: AfterAll(func(ctx SpecContext) { if CurrentSpecReport().Failed() { + collector.CollectAndSave(ctx) common.LogDebugInfo(common.ControlPlane, k) debugInfoLogged = true } diff --git a/tests/e2e/operator/operator_install_test.go b/tests/e2e/operator/operator_install_test.go index 9899437c2..38abdc877 100644 --- a/tests/e2e/operator/operator_install_test.go +++ b/tests/e2e/operator/operator_install_test.go @@ -28,6 +28,7 @@ import ( . "github.com/istio-ecosystem/sail-operator/pkg/test/util/ginkgo" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/cleaner" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/common" + "github.com/istio-ecosystem/sail-operator/tests/e2e/util/debugcollector" . "github.com/istio-ecosystem/sail-operator/tests/e2e/util/gomega" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -62,8 +63,10 @@ var _ = Describe("Operator", Label("smoke", "operator"), Ordered, func() { Describe("installation", func() { clr := cleaner.New(cl) + collector := debugcollector.New(cl, k, "operator") BeforeAll(func(ctx SpecContext) { clr.Record(ctx) + collector.Record(ctx) }) It("deploys all the CRDs", func(ctx SpecContext) { @@ -181,12 +184,12 @@ spec: }) AfterAll(func(ctx SpecContext) { - if CurrentSpecReport().Failed() && keepOnFailure { - return - } - if CurrentSpecReport().Failed() { + collector.CollectAndSave(ctx) common.LogDebugInfo(common.Operator, k) + if keepOnFailure { + return + } } clr.Cleanup(ctx) }) diff --git a/tests/e2e/util/common/e2e_utils.go b/tests/e2e/util/common/e2e_utils.go index ff5e289d1..cd63b0b4c 100644 --- a/tests/e2e/util/common/e2e_utils.go +++ b/tests/e2e/util/common/e2e_utils.go @@ -27,7 +27,6 @@ import ( "github.com/Masterminds/semver/v3" "github.com/istio-ecosystem/sail-operator/pkg/env" . "github.com/istio-ecosystem/sail-operator/pkg/test/util/ginkgo" - "github.com/istio-ecosystem/sail-operator/tests/e2e/util/istioctl" "github.com/istio-ecosystem/sail-operator/tests/e2e/util/kubectl" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -35,8 +34,6 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" - - "istio.io/istio/pkg/ptr" ) type testSuite string @@ -59,12 +56,10 @@ var ( OperatorImage = env.Get("IMAGE", "quay.io/sail-dev/sail-operator:latest") OperatorNamespace = env.Get("NAMESPACE", "sail-operator") - deploymentName = env.Get("DEPLOYMENT_NAME", "sail-operator") controlPlaneNamespace = env.Get("CONTROL_PLANE_NS", "istio-system") istioName = env.Get("ISTIO_NAME", "default") istioCniName = env.Get("ISTIOCNI_NAME", "default") istioCniNamespace = env.Get("ISTIOCNI_NAMESPACE", "istio-cni") - ztunnelNamespace = env.Get("ZTUNNEL_NAMESPACE", "ztunnel") // version can have one of the following formats: // - 1.22.2 @@ -149,124 +144,47 @@ func CheckNamespaceEmpty(ctx SpecContext, cl client.Client, ns string) { } func LogDebugInfo(suite testSuite, kubectls ...kubectl.Kubectl) { - // General debugging information to help diagnose the failure - // TODO: Add the creation of file with this information to be attached to the test report + // Log a summary of debug information. + // Detailed debug artifacts are saved to the $ARTIFACTS directory by the DebugCollector. + + artifactsDir := env.Get("ARTIFACTS", "") GinkgoWriter.Println() - GinkgoWriter.Println("The test run has failures and the debug information is as follows:") + GinkgoWriter.Println("=========================================================") + GinkgoWriter.Println("TEST FAILURE DETECTED") + GinkgoWriter.Println("=========================================================") + if artifactsDir != "" { + GinkgoWriter.Printf("Detailed debug artifacts have been saved to: %s\n", artifactsDir) + GinkgoWriter.Println("Check the debug-* directories for comprehensive cluster state information.") + } GinkgoWriter.Println() + GinkgoWriter.Println("Quick Summary:") + GinkgoWriter.Println("=========================================================") + for _, k := range kubectls { if k.ClusterName != "" { - GinkgoWriter.Println("=========================================================") GinkgoWriter.Println("CLUSTER:", k.ClusterName) - GinkgoWriter.Println("=========================================================") - } - logOperatorDebugInfo(k) - GinkgoWriter.Println("=========================================================") - logIstioDebugInfo(k) - GinkgoWriter.Println("=========================================================") - logCNIDebugInfo(k) - GinkgoWriter.Println("=========================================================") - logCertsDebugInfo(k) - GinkgoWriter.Println("=========================================================") - GinkgoWriter.Println() - - if suite == Ambient { - logZtunnelDebugInfo(k) - describe, err := k.WithNamespace(SleepNamespace).Describe("deployment", "sleep") - logDebugElement("=====sleep deployment describe=====", describe, err) - describe, err = k.WithNamespace(HttpbinNamespace).Describe("deployment", "httpbin") - logDebugElement("=====httpbin deployment describe=====", describe, err) } - } -} - -func logOperatorDebugInfo(k kubectl.Kubectl) { - k = k.WithNamespace(OperatorNamespace) - operator, err := k.GetYAML("deployment", deploymentName) - logDebugElement("=====Operator Deployment YAML=====", operator, err) - - logs, err := k.Logs("deploy/"+deploymentName, ptr.Of(120*time.Second)) - logDebugElement("=====Operator logs=====", logs, err) - - events, err := k.GetEvents() - logDebugElement("=====Events in "+OperatorNamespace+"=====", events, err) - - // Temporary information to gather more details about failure - pods, err := k.GetPods("", "-o wide") - logDebugElement("=====Pods in "+OperatorNamespace+"=====", pods, err) - - describe, err := k.Describe("deployment", deploymentName) - logDebugElement("=====Operator Deployment describe=====", describe, err) -} - -func logIstioDebugInfo(k kubectl.Kubectl) { - resource, err := k.GetYAML("istio", istioName) - logDebugElement("=====Istio YAML=====", resource, err) - - output, err := k.WithNamespace(controlPlaneNamespace).GetPods("", "-o wide") - logDebugElement("=====Pods in "+controlPlaneNamespace+"=====", output, err) - - logs, err := k.WithNamespace(controlPlaneNamespace).Logs("deploy/istiod", ptr.Of(120*time.Second)) - logDebugElement("=====Istiod logs=====", logs, err) - - events, err := k.WithNamespace(controlPlaneNamespace).GetEvents() - logDebugElement("=====Events in "+controlPlaneNamespace+"=====", events, err) - - // Running istioctl proxy-status to get the status of the proxies. - proxyStatus, err := istioctl.GetProxyStatus() - logDebugElement("=====Istioctl Proxy Status=====", proxyStatus, err) -} - -func logCNIDebugInfo(k kubectl.Kubectl) { - resource, err := k.GetYAML("istiocni", istioCniName) - logDebugElement("=====IstioCNI YAML=====", resource, err) - - ds, err := k.WithNamespace(istioCniNamespace).GetYAML("daemonset", "istio-cni-node") - logDebugElement("=====Istio CNI DaemonSet YAML=====", ds, err) - - events, err := k.WithNamespace(istioCniNamespace).GetEvents() - logDebugElement("=====Events in "+istioCniNamespace+"=====", events, err) - - // Temporary information to gather more details about failure - pods, err := k.WithNamespace(istioCniNamespace).GetPods("", "-o wide") - logDebugElement("=====Pods in "+istioCniNamespace+"=====", pods, err) - - describe, err := k.WithNamespace(istioCniNamespace).Describe("daemonset", "istio-cni-node") - logDebugElement("=====Istio CNI DaemonSet describe=====", describe, err) - - logs, err := k.WithNamespace(istioCniNamespace).Logs("daemonset/istio-cni-node", ptr.Of(120*time.Second)) - logDebugElement("=====Istio CNI logs=====", logs, err) -} - -func logZtunnelDebugInfo(k kubectl.Kubectl) { - resource, err := k.GetYAML("ztunnel", "default") - logDebugElement("=====ZTunnel YAML=====", resource, err) - ds, err := k.WithNamespace(ztunnelNamespace).GetYAML("daemonset", "ztunnel") - logDebugElement("=====ZTunnel DaemonSet YAML=====", ds, err) - - events, err := k.WithNamespace(ztunnelNamespace).GetEvents() - logDebugElement("=====Events in "+ztunnelNamespace+"=====", events, err) - - describe, err := k.WithNamespace(ztunnelNamespace).Describe("daemonset", "ztunnel") - logDebugElement("=====ZTunnel DaemonSet describe=====", describe, err) - - logs, err := k.WithNamespace(ztunnelNamespace).Logs("daemonset/ztunnel", ptr.Of(120*time.Second)) - logDebugElement("=====ztunnel logs=====", logs, err) + // Log quick status checks + logQuickStatus(k) + GinkgoWriter.Println("---") + } + GinkgoWriter.Println("=========================================================") + if artifactsDir != "" { + GinkgoWriter.Printf("\nFor full details, review artifacts in: %s\n", artifactsDir) + } + GinkgoWriter.Println() } -func logCertsDebugInfo(k kubectl.Kubectl) { - certs, err := k.WithNamespace(controlPlaneNamespace).GetSecret("cacerts") - logDebugElement("=====CA certs in "+controlPlaneNamespace+"=====", certs, err) -} +func logQuickStatus(k kubectl.Kubectl) { + // Quick status checks - just high-level information + if pods, err := k.WithNamespace(OperatorNamespace).GetPods("", ""); err == nil { + GinkgoWriter.Printf("Operator namespace pods:\n%s\n", pods) + } -func logDebugElement(caption string, info string, err error) { - GinkgoWriter.Println("\n" + caption + ":") - if err != nil { - GinkgoWriter.Println(Indent(err.Error())) - } else { - GinkgoWriter.Println(Indent(strings.TrimSpace(info))) + if output, err := k.GetYAML("istio", istioName); err == nil && output != "" { + GinkgoWriter.Println("Istio CR exists") } } diff --git a/tests/e2e/util/debugcollector/debugcollector.go b/tests/e2e/util/debugcollector/debugcollector.go new file mode 100644 index 000000000..b0089764e --- /dev/null +++ b/tests/e2e/util/debugcollector/debugcollector.go @@ -0,0 +1,394 @@ +//go:build e2e + +// Copyright Istio Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package debugcollector + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/istio-ecosystem/sail-operator/pkg/env" + . "github.com/istio-ecosystem/sail-operator/pkg/test/util/ginkgo" + "github.com/istio-ecosystem/sail-operator/tests/e2e/util/istioctl" + "github.com/istio-ecosystem/sail-operator/tests/e2e/util/kubectl" + . "github.com/onsi/ginkgo/v2" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// DebugCollector records namespaces created during tests and collects comprehensive debug information on test failure. +// It saves all debug information to the artifacts directory for easier debugging. +type DebugCollector struct { + cl client.Client + kubectl kubectl.Kubectl + ctx []string + recorded bool + artifactsDir string + recordedNS map[string]struct{} + clusterScoped bool + collectionDepth string +} + +// New returns a DebugCollector which can record namespaces and collect debug information on test failure. +// It needs an initialized client and kubectl instance, and has optional (string) context which can be used to distinguish its output. +func New(cl client.Client, k kubectl.Kubectl, ctx ...string) DebugCollector { + artifactsDir := env.Get("ARTIFACTS", os.TempDir()) + collectionDepth := env.Get("DEBUG_COLLECTOR_DEPTH", "full") // Options: full, minimal, logs-only + + return DebugCollector{ + cl: cl, + kubectl: k, + ctx: ctx, + recordedNS: make(map[string]struct{}), + artifactsDir: artifactsDir, + clusterScoped: true, + collectionDepth: collectionDepth, + } +} + +// Record will save the state of all namespaces that exist so they won't be included in debug collection. +// This allows the collector to focus on namespaces created during the test. +func (d *DebugCollector) Record(ctx context.Context) { + d.recorded = true + + // Record all existing namespaces so we can focus on test-created ones during collection + namespaceList := &corev1.NamespaceList{} + if err := d.cl.List(ctx, namespaceList); err != nil { + GinkgoWriter.Printf("Warning: Failed to list namespaces during Record: %v\n", err) + return + } + + for _, ns := range namespaceList.Items { + d.recordedNS[ns.Name] = struct{}{} + } +} + +// CollectAndSave collects comprehensive debug information and saves it to the artifacts directory. +// It creates a timestamped directory structure for organized artifact storage. +func (d *DebugCollector) CollectAndSave(ctx context.Context) string { + if !d.recorded { + GinkgoWriter.Println("Warning: DebugCollector.Record() was not called. Collecting all namespaces.") + } + + timestamp := time.Now().Format("20060102-150405") + contextStr := strings.Join(d.ctx, "-") + if contextStr != "" { + contextStr = "-" + contextStr + } + + debugDir := filepath.Join(d.artifactsDir, fmt.Sprintf("debug%s-%s", contextStr, timestamp)) + + if err := os.MkdirAll(debugDir, 0o755); err != nil { + GinkgoWriter.Printf("Error creating debug directory %s: %v\n", debugDir, err) + return debugDir + } + + By(fmt.Sprintf("Collecting debug information to %s", debugDir)) + + // Collect cluster-scoped resources + if d.clusterScoped { + d.collectClusterScopedResources(ctx, debugDir) + } + + // Collect namespace-scoped resources + d.collectNamespaceResources(ctx, debugDir) + + // Collect istioctl proxy-status + d.collectIstioctlInfo(debugDir) + + Success(fmt.Sprintf("Debug information saved to: %s", debugDir)) + + return debugDir +} + +// collectClusterScopedResources collects cluster-wide resources for debugging. +func (d *DebugCollector) collectClusterScopedResources(ctx context.Context, debugDir string) { + clusterDir := filepath.Join(debugDir, "cluster-scoped") + if err := os.MkdirAll(clusterDir, 0o755); err != nil { + GinkgoWriter.Printf("Error creating cluster-scoped directory: %v\n", err) + return + } + + // Collect Istio CRs + d.collectCustomResources(ctx, clusterDir, "sailoperator.io", "v1", "Istio") + d.collectCustomResources(ctx, clusterDir, "sailoperator.io", "v1", "IstioCNI") + d.collectCustomResources(ctx, clusterDir, "sailoperator.io", "v1alpha1", "ZTunnel") + d.collectCustomResources(ctx, clusterDir, "sailoperator.io", "v1", "IstioRevision") + d.collectCustomResources(ctx, clusterDir, "sailoperator.io", "v1", "IstioRevisionTag") + + // Collect nodes information - use GetYAML for cluster-scoped resources + // For nodes, we'll use kubectl directly with empty namespace + k := d.kubectl.WithNamespace("") + if output, err := k.GetYAML("nodes", ""); err == nil { + d.writeToFile(filepath.Join(clusterDir, "nodes.yaml"), output) + } +} + +// collectCustomResources collects custom resources of a specific GVK. +func (d *DebugCollector) collectCustomResources(ctx context.Context, dir, _ /* group */, version, kind string) { + gvk := schema.GroupVersionKind{ + Group: "sailoperator.io", + Version: version, + Kind: kind + "List", + } + + list := &unstructured.UnstructuredList{} + list.SetGroupVersionKind(gvk) + + if err := d.cl.List(ctx, list); err != nil { + GinkgoWriter.Printf("Warning: Failed to list %s: %v\n", kind, err) + return + } + + for _, item := range list.Items { + filename := fmt.Sprintf("%s-%s.yaml", strings.ToLower(kind), item.GetName()) + if output, err := d.kubectl.GetYAML(strings.ToLower(kind), item.GetName()); err == nil { + d.writeToFile(filepath.Join(dir, filename), output) + } + } +} + +// collectNamespaceResources collects resources from namespaces created during the test. +func (d *DebugCollector) collectNamespaceResources(ctx context.Context, debugDir string) { + namespaceList := &corev1.NamespaceList{} + if err := d.cl.List(ctx, namespaceList); err != nil { + GinkgoWriter.Printf("Error listing namespaces: %v\n", err) + return + } + + namespacesDir := filepath.Join(debugDir, "namespaces") + if err := os.MkdirAll(namespacesDir, 0o755); err != nil { + GinkgoWriter.Printf("Error creating namespaces directory: %v\n", err) + return + } + + for _, ns := range namespaceList.Items { + // Skip namespaces that existed before the test + if _, recorded := d.recordedNS[ns.Name]; recorded { + continue + } + + // Skip system namespaces that might have been auto-created + if d.isSystemNamespace(ns.Name) { + continue + } + + d.collectNamespaceDebugInfo(ctx, namespacesDir, ns.Name) + } +} + +// isSystemNamespace checks if a namespace should be skipped from collection. +func (d *DebugCollector) isSystemNamespace(ns string) bool { + systemNamespaces := []string{ + "kube-system", + "kube-public", + "kube-node-lease", + "default", + "local-path-storage", + } + + for _, sysNS := range systemNamespaces { + if ns == sysNS { + return true + } + } + + return false +} + +// collectNamespaceDebugInfo collects all debug information for a specific namespace. +func (d *DebugCollector) collectNamespaceDebugInfo(ctx context.Context, namespacesDir, ns string) { + nsDir := filepath.Join(namespacesDir, ns) + if err := os.MkdirAll(nsDir, 0o755); err != nil { + GinkgoWriter.Printf("Error creating namespace directory %s: %v\n", ns, err) + return + } + + // Create subdirectories + resourcesDir := filepath.Join(nsDir, "resources") + logsDir := filepath.Join(nsDir, "logs") + + if err := os.MkdirAll(resourcesDir, 0o755); err != nil { + GinkgoWriter.Printf("Error creating resources directory for %s: %v\n", ns, err) + return + } + + if d.collectionDepth != "minimal" { + if err := os.MkdirAll(logsDir, 0o755); err != nil { + GinkgoWriter.Printf("Error creating logs directory for %s: %v\n", ns, err) + return + } + } + + k := d.kubectl.WithNamespace(ns) + + // Collect resources (always collected regardless of depth) + d.collectResourcesInNamespace(ctx, ns, resourcesDir, k) + + // Collect events + if events, err := k.GetEvents(); err == nil { + d.writeToFile(filepath.Join(nsDir, "events.yaml"), events) + } + + // Collect pod logs (skipped in minimal mode) + if d.collectionDepth != "minimal" { + d.collectPodLogs(ctx, ns, logsDir) + } +} + +// collectResourcesInNamespace collects various Kubernetes resources in a specific namespace. +func (d *DebugCollector) collectResourcesInNamespace(ctx context.Context, ns, resourcesDir string, k kubectl.Kubectl) { + // Collect Deployments + deploymentList := &appsv1.DeploymentList{} + if err := d.cl.List(ctx, deploymentList, client.InNamespace(ns)); err == nil { + for _, deploy := range deploymentList.Items { + if output, err := k.GetYAML("deployment", deploy.Name); err == nil { + d.writeToFile(filepath.Join(resourcesDir, fmt.Sprintf("deployment-%s.yaml", deploy.Name)), output) + } + // Also collect describe output for more details + if describe, err := k.Describe("deployment", deploy.Name); err == nil { + d.writeToFile(filepath.Join(resourcesDir, fmt.Sprintf("deployment-%s.describe.txt", deploy.Name)), describe) + } + } + } + + // Collect DaemonSets + daemonsetList := &appsv1.DaemonSetList{} + if err := d.cl.List(ctx, daemonsetList, client.InNamespace(ns)); err == nil { + for _, ds := range daemonsetList.Items { + if output, err := k.GetYAML("daemonset", ds.Name); err == nil { + d.writeToFile(filepath.Join(resourcesDir, fmt.Sprintf("daemonset-%s.yaml", ds.Name)), output) + } + if describe, err := k.Describe("daemonset", ds.Name); err == nil { + d.writeToFile(filepath.Join(resourcesDir, fmt.Sprintf("daemonset-%s.describe.txt", ds.Name)), describe) + } + } + } + + // Collect Services + serviceList := &corev1.ServiceList{} + if err := d.cl.List(ctx, serviceList, client.InNamespace(ns)); err == nil { + for _, svc := range serviceList.Items { + if output, err := k.GetYAML("service", svc.Name); err == nil { + d.writeToFile(filepath.Join(resourcesDir, fmt.Sprintf("service-%s.yaml", svc.Name)), output) + } + } + } + + // Collect Pods (list view) + if pods, err := k.GetPods("", "-o wide"); err == nil { + d.writeToFile(filepath.Join(resourcesDir, "pods-list.txt"), pods) + } + + // Collect individual pod YAMLs + podList := &corev1.PodList{} + if err := d.cl.List(ctx, podList, client.InNamespace(ns)); err == nil { + for _, pod := range podList.Items { + if output, err := k.GetYAML("pod", pod.Name); err == nil { + d.writeToFile(filepath.Join(resourcesDir, fmt.Sprintf("pod-%s.yaml", pod.Name)), output) + } + } + } + + // Collect ConfigMaps + cmList := &corev1.ConfigMapList{} + if err := d.cl.List(ctx, cmList, client.InNamespace(ns)); err == nil { + for _, cm := range cmList.Items { + if output, err := k.GetYAML("configmap", cm.Name); err == nil { + d.writeToFile(filepath.Join(resourcesDir, fmt.Sprintf("configmap-%s.yaml", cm.Name)), output) + } + } + } + + // Collect Secrets list (we get the list but individual secrets may contain sensitive data) + // Using GetYAML on the resource type will list all secrets + secretsList := &corev1.SecretList{} + if err := d.cl.List(ctx, secretsList, client.InNamespace(ns)); err == nil { + // Just save the names and types, not the actual secret data + var secretsInfo strings.Builder + secretsInfo.WriteString(fmt.Sprintf("Secrets in namespace %s:\n", ns)) + for _, secret := range secretsList.Items { + secretsInfo.WriteString(fmt.Sprintf(" - Name: %s, Type: %s\n", secret.Name, secret.Type)) + } + d.writeToFile(filepath.Join(resourcesDir, "secrets-list.txt"), secretsInfo.String()) + } +} + +// collectPodLogs collects logs from all pods in a namespace. +func (d *DebugCollector) collectPodLogs(ctx context.Context, ns, logsDir string) { + podList := &corev1.PodList{} + if err := d.cl.List(ctx, podList, client.InNamespace(ns)); err != nil { + GinkgoWriter.Printf("Error listing pods in namespace %s: %v\n", ns, err) + return + } + + k := d.kubectl.WithNamespace(ns) + logsSince := 120 * time.Second + + for _, pod := range podList.Items { + // Collect logs from all containers in the pod + for _, container := range pod.Spec.Containers { + logFile := fmt.Sprintf("%s-%s.log", pod.Name, container.Name) + if logs, err := k.Logs(fmt.Sprintf("pod/%s -c %s", pod.Name, container.Name), &logsSince); err == nil { + d.writeToFile(filepath.Join(logsDir, logFile), logs) + } else { + d.writeToFile(filepath.Join(logsDir, logFile), fmt.Sprintf("Error collecting logs: %v", err)) + } + } + + // Collect logs from init containers if any + for _, container := range pod.Spec.InitContainers { + logFile := fmt.Sprintf("%s-%s-init.log", pod.Name, container.Name) + if logs, err := k.Logs(fmt.Sprintf("pod/%s -c %s", pod.Name, container.Name), &logsSince); err == nil { + d.writeToFile(filepath.Join(logsDir, logFile), logs) + } + } + } +} + +// collectIstioctlInfo collects istioctl debug information. +func (d *DebugCollector) collectIstioctlInfo(debugDir string) { + if d.collectionDepth == "minimal" { + return + } + + istioctlDir := filepath.Join(debugDir, "istioctl") + if err := os.MkdirAll(istioctlDir, 0o755); err != nil { + GinkgoWriter.Printf("Error creating istioctl directory: %v\n", err) + return + } + + // Collect proxy-status + if proxyStatus, err := istioctl.GetProxyStatus(); err == nil { + d.writeToFile(filepath.Join(istioctlDir, "proxy-status.txt"), proxyStatus) + } else { + d.writeToFile(filepath.Join(istioctlDir, "proxy-status.txt"), fmt.Sprintf("Error: %v", err)) + } +} + +// writeToFile writes content to a file, creating parent directories if needed. +func (d *DebugCollector) writeToFile(filepath, content string) { + if err := os.WriteFile(filepath, []byte(content), 0o644); err != nil { + GinkgoWriter.Printf("Error writing to file %s: %v\n", filepath, err) + } +}