diff --git a/internal/cmd/egctl/collect.go b/internal/cmd/egctl/collect.go index e95db983eb..d412ed6ae7 100644 --- a/internal/cmd/egctl/collect.go +++ b/internal/cmd/egctl/collect.go @@ -16,6 +16,7 @@ import ( "time" "github.com/spf13/cobra" + "k8s.io/apimachinery/pkg/util/sets" cmdutil "k8s.io/kubectl/pkg/cmd/util" "github.com/envoyproxy/gateway/internal/cmd/options" @@ -26,6 +27,7 @@ import ( type collectOptions struct { outPath string envoyGatewayNamespace string + proxyNamespaces []string } func newCollectCommand() *cobra.Command { @@ -48,6 +50,8 @@ func newCollectCommand() *cobra.Command { "Specify the output file path for collected data. If not specified, a timestamped file will be created in the current directory.") collectCommand.PersistentFlags().StringVarP(&collectOpts.envoyGatewayNamespace, "envoy-system-namespace", "", "envoy-gateway-system", "Specify the namespace where the Envoy Gateway controller is installed.") + collectCommand.PersistentFlags().StringArrayVarP(&collectOpts.proxyNamespaces, "envoy-proxy-namespaces", "", []string{}, + "Specify the namespaces where Envoy proxies are running.") return collectCommand } @@ -88,8 +92,16 @@ func runCollect(collectOpts collectOptions) error { return fmt.Errorf("create bundle dir: %w", err) } - result := tb.CollectResult(ctx, restConfig, bundlePath, collectOpts.envoyGatewayNamespace) - return result.ArchiveSupportBundle(bundlePath, fmt.Sprintf("%s.tar.gz", basename)) + proxyNamespaces := sets.New(collectOpts.proxyNamespaces...) + opts := tb.CollectOptions{ + BundlePath: bundlePath, + CollectedNamespaces: []string{collectOpts.envoyGatewayNamespace}, + } + if len(proxyNamespaces) > 0 { + opts.CollectedNamespaces = append(opts.CollectedNamespaces, proxyNamespaces.UnsortedList()...) + } + result := tb.CollectResult(ctx, restConfig, opts) + return result.ArchiveBundle(bundlePath, fmt.Sprintf("%s.tar.gz", basename)) } func waitForSignal(c context.Context, cancel context.CancelFunc) { diff --git a/internal/cmd/egctl/root.go b/internal/cmd/egctl/root.go index 7143dc1e06..f94f8e23b3 100644 --- a/internal/cmd/egctl/root.go +++ b/internal/cmd/egctl/root.go @@ -5,11 +5,17 @@ package egctl -import "github.com/spf13/cobra" +import ( + "github.com/go-logr/logr" + "github.com/spf13/cobra" + "k8s.io/klog/v2" +) // GetRootCommand returns the root cobra command to be executed // by egctl main. func GetRootCommand() *cobra.Command { + // discard klog logger + klog.SetLogger(logr.Discard()) rootCmd := &cobra.Command{ Use: "egctl", Long: "A command line utility for operating Envoy Gateway", diff --git a/internal/troubleshoot/collect.go b/internal/troubleshoot/collect.go index 9e7b6dc9ec..00a80813bd 100644 --- a/internal/troubleshoot/collect.go +++ b/internal/troubleshoot/collect.go @@ -16,7 +16,12 @@ import ( "github.com/envoyproxy/gateway/internal/troubleshoot/collect" ) -func CollectResult(ctx context.Context, restConfig *rest.Config, bundlePath, egNamespace string) tbcollect.CollectorResult { +type CollectOptions struct { + CollectedNamespaces []string + BundlePath string +} + +func CollectResult(ctx context.Context, restConfig *rest.Config, opts CollectOptions) tbcollect.CollectorResult { var result tbcollect.CollectorResult progressChan := make(chan interface{}) @@ -33,40 +38,44 @@ func CollectResult(ctx context.Context, restConfig *rest.Config, bundlePath, egN // Collect the custom resources from Gateway API and EG collect.CustomResource{ ClientConfig: restConfig, - BundlePath: bundlePath, + BundlePath: opts.BundlePath, IncludeGroups: []string{ "gateway.envoyproxy.io", "gateway.networking.k8s.io", }, }, - // Collect resources from EnvoyGateway system namespace - collect.EnvoyGatewayResource{ - ClientConfig: restConfig, - BundlePath: bundlePath, - Namespace: egNamespace, - }, - // Collect logs from EnvoyGateway system namespace - &tbcollect.CollectLogs{ - Collector: &troubleshootv1b2.Logs{ - Name: "pod-logs", - Namespace: egNamespace, + } + for _, ns := range opts.CollectedNamespaces { + bundlePath := opts.BundlePath + collectors = append(collectors, + // Collect resources from EnvoyGateway system namespace + collect.EnvoyGatewayResource{ + ClientConfig: restConfig, + BundlePath: bundlePath, + Namespace: ns, }, - ClientConfig: restConfig, - BundlePath: bundlePath, - Context: ctx, - }, - // Collect prometheus metrics from EnvoyGateway system namespace - collect.PrometheusMetric{ - BundlePath: bundlePath, - ClientConfig: restConfig, - Namespace: egNamespace, - }, - // Collect config dump from EnvoyGateway system namespace - collect.ConfigDump{ - BundlePath: bundlePath, - ClientConfig: restConfig, - Namespace: egNamespace, - }, + // Collect logs from EnvoyGateway system namespace + &tbcollect.CollectLogs{ + Collector: &troubleshootv1b2.Logs{ + Name: "pod-logs", + Namespace: ns, + }, + ClientConfig: restConfig, + BundlePath: bundlePath, + Context: ctx, + }, + // Collect prometheus metrics from EnvoyGateway system namespace + collect.PrometheusMetric{ + BundlePath: bundlePath, + ClientConfig: restConfig, + Namespace: ns, + }, + // Collect config dump from EnvoyGateway system namespace + collect.ConfigDump{ + BundlePath: bundlePath, + ClientConfig: restConfig, + Namespace: ns, + }) } total := len(collectors) allCollectedData := make(map[string][]byte) diff --git a/internal/troubleshoot/collect/config_dump.go b/internal/troubleshoot/collect/config_dump.go index 7a5c9b3834..e54850e61e 100644 --- a/internal/troubleshoot/collect/config_dump.go +++ b/internal/troubleshoot/collect/config_dump.go @@ -81,8 +81,7 @@ func (cd ConfigDump) Collect(_ chan<- interface{}) (tbcollect.CollectorResult, e continue } - k := fmt.Sprintf("%s-%s.json", pod.Namespace, pod.Name) - _ = output.SaveResult(cd.BundlePath, path.Join("config-dumps", k), bytes.NewBuffer(data)) + _ = output.SaveResult(cd.BundlePath, path.Join("config-dumps", pod.Namespace, fmt.Sprintf("%s.json", pod.Name)), bytes.NewBuffer(data)) } if len(logs) > 0 { _ = output.SaveResult(cd.BundlePath, path.Join("config-dumps", "errors.log"), marshalErrors(logs)) diff --git a/internal/troubleshoot/collect/prometheus_metrics.go b/internal/troubleshoot/collect/prometheus_metrics.go index 9f659a54a2..f8cbc10b29 100644 --- a/internal/troubleshoot/collect/prometheus_metrics.go +++ b/internal/troubleshoot/collect/prometheus_metrics.go @@ -25,6 +25,7 @@ import ( "k8s.io/client-go/rest" kube "github.com/envoyproxy/gateway/internal/kubernetes" + "github.com/envoyproxy/gateway/internal/utils/str" ) var _ tbcollect.Collector = &PrometheusMetric{} @@ -76,32 +77,23 @@ func (p PrometheusMetric) Collect(_ chan<- interface{}) (tbcollect.CollectorResu logs := make([]string, 0) for _, pod := range pods { - annos := pod.GetAnnotations() - if v, ok := annos["prometheus.io/scrape"]; !ok || v != "true" { - logs = append(logs, fmt.Sprintf("pod %s/%s is skipped because of missing annotation prometheus.io/scrape", pod.Namespace, pod.Name)) - continue - } - nn, port, reqPath := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, 19001, "/metrics" - if v, ok := annos["prometheus.io/port"]; !ok { - port, err = strconv.Atoi(v) - if err != nil { - logs = append(logs, fmt.Sprintf("pod %s/%s is skipped because of invalid prometheus.io/port", pod.Namespace, pod.Name)) - continue - } + scrape, reqPath, port, err := getPrometheusPathAndPort(&pod) + if err != nil { + logs = append(logs, fmt.Sprintf("pod %s/%s is skipped because of err: %v", pod.Namespace, pod.Name, err)) } - if v, ok := annos["prometheus.io/path"]; ok { - reqPath = v + if !scrape { + logs = append(logs, fmt.Sprintf("pod %s/%s is skipped because of annotation prometheus.io/scrape=false", pod.Namespace, pod.Name)) + continue } - data, err := RequestWithPortForwarder(cliClient, nn, port, reqPath) + data, err := RequestWithPortForwarder(cliClient, types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace}, port, reqPath) if err != nil { - logs = append(logs, fmt.Sprintf("pod %s/%s is skipped because of err: %v", pod.Namespace, pod.Name, err)) + logs = append(logs, fmt.Sprintf("pod %s/%s:%v%s is skipped because of err: %v", pod.Namespace, pod.Name, port, reqPath, err)) continue } - k := fmt.Sprintf("%s-%s.prom", pod.Namespace, pod.Name) - _ = output.SaveResult(p.BundlePath, path.Join("prometheus-metrics", k), bytes.NewBuffer(data)) + _ = output.SaveResult(p.BundlePath, path.Join("prometheus-metrics", pod.Namespace, fmt.Sprintf("%s.prom", pod.Name)), bytes.NewBuffer(data)) } if len(logs) > 0 { _ = output.SaveResult(p.BundlePath, path.Join("prometheus-metrics", "error.log"), bytes.NewBuffer([]byte(strings.Join(logs, "\n")))) @@ -110,6 +102,32 @@ func (p PrometheusMetric) Collect(_ chan<- interface{}) (tbcollect.CollectorResu return output, nil } +func getPrometheusPathAndPort(pod *corev1.Pod) (bool, string, int, error) { + reqPath := "/metrics" + port := 9090 + scrape := false + annotations := pod.GetAnnotations() + for k, v := range annotations { + switch str.SanitizeLabelName(k) { + case "prometheus_io_scrape": + if v != "true" { + return false, "", 0, fmt.Errorf("pod %s/%s is skipped because of missing annotation prometheus.io/scrape", pod.Namespace, pod.Name) + } + scrape = true + case "prometheus_io_port": + p, err := strconv.Atoi(v) + if err != nil { + return false, "", 0, fmt.Errorf("failed to parse port from annotation: %w", err) + } + port = p + case "prometheus_io_path": + reqPath = v + } + } + + return scrape, reqPath, port, nil +} + func listPods(ctx context.Context, client kubernetes.Interface, namespace string, selector labels.Selector) ([]corev1.Pod, error) { pods, err := client.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ LabelSelector: selector.String(), diff --git a/internal/utils/str/strconv.go b/internal/utils/str/strconv.go new file mode 100644 index 0000000000..3e5db95f40 --- /dev/null +++ b/internal/utils/str/strconv.go @@ -0,0 +1,22 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package str + +// This file contains utility functions copied from github.com/prometheus/prometheus/util/strutil, +// which is conflicting with the current package. + +import "regexp" + +var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) + +// SanitizeLabelName replaces anything that doesn't match +// client_label.LabelNameRE with an underscore. +// Note: this does not handle all Prometheus label name restrictions (such as +// not starting with a digit 0-9), and hence should only be used if the label +// name is prefixed with a known valid string. +func SanitizeLabelName(name string) string { + return invalidLabelCharRE.ReplaceAllString(name, "_") +} diff --git a/test/e2e/tests/utils.go b/test/e2e/tests/utils.go index 1a628a2155..7f6fb996ba 100644 --- a/test/e2e/tests/utils.go +++ b/test/e2e/tests/utils.go @@ -602,7 +602,15 @@ type LokiQueryResponse struct { // CollectAndDump collects and dumps the cluster data for troubleshooting and log. // This function should be call within t.Cleanup. func CollectAndDump(t *testing.T, rest *rest.Config) { - result := tb.CollectResult(context.TODO(), rest, "", "envoy-gateway-system") + dumpedNamespaces := []string{"envoy-gateway-system"} + if IsGatewayNamespaceMode() { + dumpedNamespaces = append(dumpedNamespaces, ConformanceInfraNamespace) + } + + result := tb.CollectResult(context.TODO(), rest, tb.CollectOptions{ + BundlePath: "", + CollectedNamespaces: dumpedNamespaces, + }) for r, data := range result { tlog.Logf(t, "\nfilename: %s", r) tlog.Logf(t, "\ndata: \n%s", data)