diff --git a/.github/workflows/publish-images.yaml b/.github/workflows/publish-images.yaml index 3c5fa49108..3c705c6e9e 100644 --- a/.github/workflows/publish-images.yaml +++ b/.github/workflows/publish-images.yaml @@ -47,7 +47,7 @@ jobs: for platform in $(echo $PLATFORMS | tr "," "\n"); do arch=${platform#*/} echo "Building manager for $arch" - make manager ARCH=$arch + make manager must-gather ARCH=$arch done - name: Docker meta diff --git a/Dockerfile b/Dockerfile index 3ab8a0336d..48104e6917 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,6 +15,7 @@ COPY --from=certificates /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-ce # Copy binary built on the host COPY bin/manager_${TARGETARCH} manager +COPY bin/must-gather_${TARGETARCH} must-gather USER 65532:65532 diff --git a/Makefile b/Makefile index 19e7d3ccd5..40d4b70016 100644 --- a/Makefile +++ b/Makefile @@ -140,6 +140,9 @@ ci: generate fmt vet test ensure-generate-is-noop manager: generate CGO_ENABLED=0 GOOS=$(GOOS) GOARCH=$(ARCH) go build -o bin/manager_${ARCH} -ldflags "${COMMON_LDFLAGS} ${OPERATOR_LDFLAGS}" main.go +must-gather: + CGO_ENABLED=0 GOOS=$(GOOS) GOARCH=$(ARCH) go build -o bin/must-gather_${ARCH} -ldflags "${COMMON_LDFLAGS} ${OPERATOR_LDFLAGS}" ./cmd/gather/main.go + # Build target allocator binary .PHONY: targetallocator targetallocator: @@ -328,7 +331,7 @@ scorecard-tests: operator-sdk # buildx is used to ensure same results for arm based systems (m1/2 chips) .PHONY: container container: GOOS = linux -container: manager +container: manager must-gather docker build -t ${IMG} . # Push the container image, used only for local dev purposes diff --git a/cmd/gather/cluster/cluster.go b/cmd/gather/cluster/cluster.go new file mode 100644 index 0000000000..226db2d9f4 --- /dev/null +++ b/cmd/gather/cluster/cluster.go @@ -0,0 +1,317 @@ +package cluster + +import ( + "context" + "fmt" + "log" + "os" + "path/filepath" + + otelv1alpha1 "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1" + otelv1beta1 "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" + "github.com/open-telemetry/opentelemetry-operator/cmd/gather/config" + routev1 "github.com/openshift/api/route/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + appsv1 "k8s.io/api/apps/v1" + autoscalingv2 "k8s.io/api/autoscaling/v2" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + policy1 "k8s.io/api/policy/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/labels" + + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type Cluster struct { + config *config.Config +} + +func NewCluster(cfg *config.Config) Cluster { + return Cluster{config: cfg} +} + +func (c *Cluster) GetOpenTelemetryCollectors() error { + otelCols := otelv1beta1.OpenTelemetryCollectorList{} + + err := c.config.KubernetesClient.List(context.TODO(), &otelCols, &client.ListOptions{}) + if err != nil { + return err + } + + log.Println("OpenTelemetryCollectors found:", len(otelCols.Items)) + + errorDetected := false + + for _, otelCol := range otelCols.Items { + err := c.processOTELCollector(&otelCol) + if err != nil { + log.Fatalln(err) + errorDetected = true + } + } + + if errorDetected { + return fmt.Errorf("something failed while getting the opentelemtrycollectors") + } + return nil +} + +func (c *Cluster) GetInstrumentations() error { + instrumentations := otelv1alpha1.InstrumentationList{} + + err := c.config.KubernetesClient.List(context.TODO(), &instrumentations, &client.ListOptions{}) + if err != nil { + return err + } + + log.Println("Instrumentations found:", len(instrumentations.Items)) + + errorDetected := false + + for _, instr := range instrumentations.Items { + outputDir := filepath.Join(c.config.CollectionDir, instr.Namespace) + err := os.MkdirAll(outputDir, os.ModePerm) + if err != nil { + log.Fatalln(err) + errorDetected = true + continue + } + + writeToFile(outputDir, &instr) + + if err != nil { + + } + } + + if errorDetected { + return fmt.Errorf("something failed while getting the opentelemtrycollectors") + } + return nil +} + +func (c *Cluster) processOTELCollector(otelCol *otelv1beta1.OpenTelemetryCollector) error { + log.Printf("Processing OpenTelemetryCollector %s/%s", otelCol.Namespace, otelCol.Name) + folder, err := createFolder(c.config.CollectionDir, otelCol) + if err != nil { + return err + } + writeToFile(folder, otelCol) + + err = c.processOwnedResources(otelCol) + if err != nil { + return err + } + + return nil +} + +func (c *Cluster) processOwnedResources(otelCol *otelv1beta1.OpenTelemetryCollector) error { + folder, err := createFolder(c.config.CollectionDir, otelCol) + if err != nil { + return err + } + errorDetected := false + + // ClusterRole + crs := rbacv1.ClusterRoleList{} + err = c.getOwnerResources(&crs, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, cr := range crs.Items { + writeToFile(folder, &cr) + } + + // ClusterRoleBindings + crbs := rbacv1.ClusterRoleBindingList{} + err = c.getOwnerResources(&crbs, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, crb := range crbs.Items { + writeToFile(folder, &crb) + } + + // ConfigMaps + cms := corev1.ConfigMapList{} + err = c.getOwnerResources(&cms, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, c := range cms.Items { + writeToFile(folder, &c) + } + + // DaemonSets + daemonsets := appsv1.DaemonSetList{} + err = c.getOwnerResources(&daemonsets, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, d := range daemonsets.Items { + writeToFile(folder, &d) + } + + // Deployments + deployments := appsv1.DeploymentList{} + err = c.getOwnerResources(&deployments, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, d := range deployments.Items { + writeToFile(folder, &d) + } + + // HPAs + hpas := autoscalingv2.HorizontalPodAutoscalerList{} + err = c.getOwnerResources(&hpas, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, h := range hpas.Items { + writeToFile(folder, &h) + } + + // Ingresses + ingresses := networkingv1.IngressList{} + err = c.getOwnerResources(&ingresses, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, i := range ingresses.Items { + writeToFile(folder, &i) + } + + // PersistentVolumes + pvs := corev1.PersistentVolumeList{} + err = c.getOwnerResources(&pvs, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, p := range pvs.Items { + writeToFile(folder, &p) + } + + // PersistentVolumeClaims + pvcs := corev1.PersistentVolumeClaimList{} + err = c.getOwnerResources(&pvcs, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, p := range pvcs.Items { + writeToFile(folder, &p) + } + + // PodDisruptionBudget + pdbs := policy1.PodDisruptionBudgetList{} + err = c.getOwnerResources(&pdbs, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, pdb := range pdbs.Items { + writeToFile(folder, &pdb) + } + + // PodMonitors + pms := monitoringv1.PodMonitorList{} + err = c.getOwnerResources(&pms, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, pm := range pms.Items { + writeToFile(folder, pm) + } + + // Routes + rs := routev1.RouteList{} + err = c.getOwnerResources(&rs, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, r := range rs.Items { + writeToFile(folder, &r) + } + + // Services + services := corev1.ServiceList{} + err = c.getOwnerResources(&services, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, s := range services.Items { + writeToFile(folder, &s) + } + + // ServiceMonitors + sms := monitoringv1.ServiceMonitorList{} + err = c.getOwnerResources(&sms, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, s := range sms.Items { + writeToFile(folder, s) + } + + // ServiceAccounts + sas := corev1.ServiceAccountList{} + err = c.getOwnerResources(&sas, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, s := range sas.Items { + writeToFile(folder, &s) + } + + // StatefulSets + statefulsets := appsv1.StatefulSetList{} + err = c.getOwnerResources(&statefulsets, otelCol) + if err != nil { + errorDetected = true + log.Fatalln(err) + } + for _, s := range statefulsets.Items { + writeToFile(folder, &s) + } + + if errorDetected { + return fmt.Errorf("something failed while getting the associated resources") + } + + return nil +} + +func (c *Cluster) getOwnerResources(objList client.ObjectList, otelCol *otelv1beta1.OpenTelemetryCollector) error { + return c.config.KubernetesClient.List(context.TODO(), objList, &client.ListOptions{ + LabelSelector: labels.SelectorFromSet(labels.Set{ + "app.kubernetes.io/instance": fmt.Sprintf("%s.%s", otelCol.Namespace, otelCol.Name), + "app.kubernetes.io/managed-by": "opentelemetry-operator", + "app.kubernetes.io/part-of": "opentelemetry", + }), + }) +} + +func hasOwnerReference(obj client.Object, otelCol *otelv1beta1.OpenTelemetryCollector) bool { + for _, ownerRef := range obj.GetOwnerReferences() { + if ownerRef.Kind == otelCol.Kind && ownerRef.UID == otelCol.UID { + return true + } + } + return false +} diff --git a/cmd/gather/cluster/write.go b/cmd/gather/cluster/write.go new file mode 100644 index 0000000000..f8d68beee2 --- /dev/null +++ b/cmd/gather/cluster/write.go @@ -0,0 +1,67 @@ +package cluster + +import ( + "fmt" + "log" + "os" + "path/filepath" + "reflect" + "strings" + + "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/serializer/json" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func createFolder(collectionDir string, otelCol *v1beta1.OpenTelemetryCollector) (string, error) { + outputDir := filepath.Join(collectionDir, otelCol.Namespace, otelCol.Name) + err := os.MkdirAll(outputDir, os.ModePerm) + if err != nil { + return "", err + } + return outputDir, nil +} + +func createFile(outputDir string, obj client.Object) (*os.File, error) { + kind := obj.GetObjectKind().GroupVersionKind().Kind + + if kind == "" { + // reflect.TypeOf(obj) will return something like *v1.Deployment. We remove the first part + prefix, typeName, found := strings.Cut(reflect.TypeOf(obj).String(), ".") + if found { + kind = typeName + } else { + kind = prefix + } + } + + kind = strings.ToLower(kind) + + path := filepath.Join(outputDir, fmt.Sprintf("%s-%s.yaml", kind, obj.GetName())) + return os.Create(path) +} + +func writeToFile(outputDir string, o client.Object) { + // Open or create the file for writing + outputFile, err := createFile(outputDir, o) + if err != nil { + log.Fatalf("Failed to create file: %v", err) + } + defer outputFile.Close() + + unstructuredDeployment, err := runtime.DefaultUnstructuredConverter.ToUnstructured(o) + if err != nil { + log.Fatalf("Error converting deployment to unstructured: %v", err) + } + + unstructuredObj := &unstructured.Unstructured{Object: unstructuredDeployment} + + // Serialize the unstructured object to YAML + serializer := json.NewYAMLSerializer(json.DefaultMetaFactory, nil, nil) + err = serializer.Encode(unstructuredObj, outputFile) + if err != nil { + log.Fatalf("Error encoding to YAML: %v", err) + } +} diff --git a/cmd/gather/config/config.go b/cmd/gather/config/config.go new file mode 100644 index 0000000000..6008b3f260 --- /dev/null +++ b/cmd/gather/config/config.go @@ -0,0 +1,41 @@ +package config + +import ( + "fmt" + "path/filepath" + + "github.com/spf13/pflag" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/util/homedir" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type Config struct { + CollectionDir string + KubernetesClient client.Client +} + +func NewConfig(scheme *runtime.Scheme) (Config, error) { + var kubeconfigPath string + var collectionDir string + + pflag.StringVar(&kubeconfigPath, "kubeconfig-path", filepath.Join(homedir.HomeDir(), ".kube", "config"), "Absolute path to the KubeconfigPath file") + pflag.StringVar(&collectionDir, "collection-dir", filepath.Join(homedir.HomeDir(), "must-gather"), "Absolute path to the KubeconfigPath file") + pflag.Parse() + + config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) + if err != nil { + return Config{}, fmt.Errorf("Error reading the kubeconfig: %s\n", err.Error()) + } + + clusterClient, err := client.New(config, client.Options{Scheme: scheme}) + if err != nil { + return Config{}, fmt.Errorf("Creating the Kubernetes client: %s\n", err) + } + + return Config{ + CollectionDir: collectionDir, + KubernetesClient: clusterClient, + }, nil +} diff --git a/cmd/gather/main.go b/cmd/gather/main.go new file mode 100644 index 0000000000..d543669b1b --- /dev/null +++ b/cmd/gather/main.go @@ -0,0 +1,49 @@ +package main + +import ( + "fmt" + "os" + + otelv1alpha1 "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1" + otelv1beta1 "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" + "github.com/open-telemetry/opentelemetry-operator/cmd/gather/cluster" + "github.com/open-telemetry/opentelemetry-operator/cmd/gather/config" + routev1 "github.com/openshift/api/route/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + appsv1 "k8s.io/api/apps/v1" + autoscalingv2 "k8s.io/api/autoscaling/v2" + corev1 "k8s.io/api/core/v1" + networkingv1 "k8s.io/api/networking/v1" + policyV1 "k8s.io/api/policy/v1" + rbacv1 "k8s.io/api/rbac/v1" + k8sruntime "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" +) + +var scheme *k8sruntime.Scheme + +func init() { + scheme = k8sruntime.NewScheme() + utilruntime.Must(otelv1alpha1.AddToScheme(scheme)) + utilruntime.Must(otelv1beta1.AddToScheme(scheme)) + utilruntime.Must(appsv1.AddToScheme(scheme)) + utilruntime.Must(corev1.AddToScheme(scheme)) + utilruntime.Must(networkingv1.AddToScheme(scheme)) + utilruntime.Must(autoscalingv2.AddToScheme(scheme)) + utilruntime.Must(rbacv1.AddToScheme(scheme)) + utilruntime.Must(policyV1.AddToScheme(scheme)) + utilruntime.Must(monitoringv1.AddToScheme(scheme)) + utilruntime.Must(routev1.AddToScheme(scheme)) +} + +func main() { + config, err := config.NewConfig(scheme) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + + cluster := cluster.NewCluster(&config) + cluster.GetOpenTelemetryCollectors() + cluster.GetInstrumentations() +}