Skip to content

Commit

Permalink
eBPF instrumentation manager (#1776)
Browse files Browse the repository at this point in the history
This PR is a follow-up to #1645.
Adding the new `Manager` which will eventually replace the `Director`.
The new design has the following key features and improvements:
1. Use the [new runtime-detector
module](https://github.com/odigos-io/runtime-detector) to trigger
instrument/un-instrument events. This replaces the current approach
which relies on a Pod reconciler. The pod reconciler approach main
disadvantage is in scenarios of multiple containers in the same Pod and
multiple processes in the same container. Changing the trigger to being
process creation will allow us to guarantee we won't miss a requested
instrumentation.
note: the `runtime-detector` is configured to filter process events and
will only pass events according to its configuration.
2. Event loop design. The current director has a lot of fixed and
potential race conditions due to the concurrent nature of processes
creating/exiting and Pod events from the reconciler. The new `Manager`
does not have locks and uses an internal event loop.
3. Configuration updates are triggered by the `InstrumentationConfig`
reconciler (same as before) - those updates will be handled in the event
loop.
4. The `Factory` interface is refactored and a `Settings` option can be
expanded in the future to add more initial configuration options.
5. The `Instrumentation` interface is introduced and will replace
`OtelEbpfSdk`.
6. Update `go.opentelemetry.io/auto` to `v0.18.0-alpha`.

This change will currently only apply for OSS Go instrumentation.
  • Loading branch information
RonFed authored Dec 4, 2024
1 parent 3d436ce commit 5d612b2
Show file tree
Hide file tree
Showing 14 changed files with 793 additions and 275 deletions.
21 changes: 21 additions & 0 deletions k8sutils/pkg/container/container.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,33 @@
package container

import (
"errors"
"strings"

"github.com/odigos-io/odigos/common"
v1 "k8s.io/api/core/v1"
)

var (
ErrDeviceNotDetected = errors.New("device not detected")
ErrContainerNotInPodSpec = errors.New("container not found in pod spec")
)

func LanguageSdkFromPodContainer(pod *v1.Pod, containerName string) (common.ProgrammingLanguage, common.OtelSdk, error) {
for _, container := range pod.Spec.Containers {
if container.Name == containerName {
language, sdk, found := GetLanguageAndOtelSdk(container)
if !found {
return common.UnknownProgrammingLanguage, common.OtelSdk{}, ErrDeviceNotDetected
}

return language, sdk, nil
}
}

return common.UnknownProgrammingLanguage, common.OtelSdk{}, ErrContainerNotInPodSpec
}

func GetLanguageAndOtelSdk(container v1.Container) (common.ProgrammingLanguage, common.OtelSdk, bool) {
deviceName := podContainerDeviceName(container)
if deviceName == nil {
Expand Down
41 changes: 40 additions & 1 deletion k8sutils/pkg/workload/ownerreference.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,55 @@
package workload

import (
"context"
"errors"
"fmt"
"strings"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// PodWorkloadObjectOrError is the same as PodWorkloadObject but returns an error if the workload is not found.
func PodWorkloadObjectOrError(ctx context.Context, pod *corev1.Pod) (*PodWorkload, error) {
pw, err := PodWorkloadObject(ctx, pod)
if err != nil {
return nil, err
}

if pw == nil {
return nil, fmt.Errorf("workload not found for pod %s/%s", pod.Namespace, pod.Name)
}

return pw, nil
}

// PodWorkload returns the workload object that manages the provided pod.
// If the pod is not owned by a controller, it returns a nil workload with no error.
func PodWorkloadObject(ctx context.Context, pod *corev1.Pod) (*PodWorkload, error) {
for _, owner := range pod.OwnerReferences {
workloadName, workloadKind, err := GetWorkloadFromOwnerReference(owner)
if err != nil {
if errors.Is(err, ErrKindNotSupported) {
continue
}
return nil, IgnoreErrorKindNotSupported(err)
}

return &PodWorkload{
Name: workloadName,
Kind: workloadKind,
Namespace: pod.Namespace,
}, nil
}

// Pod does not necessarily have to be managed by a controller
return nil, nil
}

// GetWorkloadFromOwnerReference retrieves both the workload name and workload kind
// from the provided owner reference.
func GetWorkloadFromOwnerReference(ownerReference metav1.OwnerReference) (workloadName string, workloadKind WorkloadKind, err error) {

return GetWorkloadNameAndKind(ownerReference.Name, ownerReference.Kind)
}

Expand Down
76 changes: 35 additions & 41 deletions odiglet/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,22 @@ import (
"os"
"sync"

detector "github.com/odigos-io/odigos/odiglet/pkg/detector"
"github.com/odigos-io/odigos/odiglet/pkg/ebpf"
"github.com/odigos-io/odigos/odiglet/pkg/ebpf/sdks"
"github.com/odigos-io/odigos/odiglet/pkg/instrumentation/fs"

"github.com/kubevirt/device-plugin-manager/pkg/dpm"
"github.com/odigos-io/odigos/common"
k8senv "github.com/odigos-io/odigos/k8sutils/pkg/env"
"github.com/odigos-io/odigos/odiglet/pkg/ebpf"
"github.com/odigos-io/odigos/odiglet/pkg/env"
"github.com/odigos-io/odigos/odiglet/pkg/instrumentation"
"github.com/odigos-io/odigos/odiglet/pkg/instrumentation/instrumentlang"
"github.com/odigos-io/odigos/odiglet/pkg/kube"
"github.com/odigos-io/odigos/odiglet/pkg/log"
"github.com/odigos-io/odigos/opampserver/pkg/server"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"

_ "net/http/pprof"
Expand All @@ -45,8 +42,8 @@ func odigletInitPhase() {
type odiglet struct {
clientset *kubernetes.Clientset
mgr ctrl.Manager
ctx context.Context
ebpfDirectors ebpf.DirectorsMap
ebpfManager *ebpf.Manager
configUpdates chan<- ebpf.ConfigUpdate
}

func newOdiglet() (*odiglet, error) {
Expand All @@ -66,34 +63,42 @@ func newOdiglet() (*odiglet, error) {
return nil, fmt.Errorf("Failed to create controller-runtime manager %w", err)
}

ctx := signals.SetupSignalHandler()

ebpfDirectors, err := initEbpf(ctx, mgr.GetClient(), mgr.GetScheme())
ebpfManager, err := ebpf.NewManager(
mgr.GetClient(),
log.Logger,
map[ebpf.OtelDistribution]ebpf.Factory{
ebpf.OtelDistribution{
Language: common.GoProgrammingLanguage,
OtelSdk: common.OtelSdkEbpfCommunity,
}: sdks.NewGoInstrumentationFactory(),
},
)
if err != nil {
return nil, fmt.Errorf("Failed to init eBPF director %w", err)
return nil, fmt.Errorf("Failed to create ebpf manager %w", err)
}

err = kube.SetupWithManager(mgr, ebpfDirectors, clientset)
configUpdates := ebpfManager.ConfigUpdates()
err = kube.SetupWithManager(mgr, nil, clientset, configUpdates)
if err != nil {
return nil, fmt.Errorf("Failed to setup controller-runtime manager %w", err)
}

return &odiglet{
clientset: clientset,
mgr: mgr,
ctx: ctx,
ebpfDirectors: ebpfDirectors,
ebpfManager: ebpfManager,
configUpdates: configUpdates,
}, nil
}

func (o *odiglet) run() {
func (o *odiglet) run(ctx context.Context) {
var wg sync.WaitGroup

// Start pprof server
wg.Add(1)
go func() {
defer wg.Done()
err := common.StartPprofServer(o.ctx, log.Logger)
err := common.StartPprofServer(ctx, log.Logger)
if err != nil {
log.Logger.Error(err, "Failed to start pprof server")
} else {
Expand All @@ -111,24 +116,22 @@ func (o *odiglet) run() {
log.Logger.V(0).Info("Device manager exited")
}()

procEvents := make(chan detector.ProcessEvent)
wg.Add(1)
go func() {
defer wg.Done()
err := detector.StartRuntimeDetector(o.ctx, log.Logger, procEvents)
err := o.ebpfManager.Run(ctx)
if err != nil {
log.Logger.Error(err, "Failed to start runtime detector")
os.Exit(-1)
log.Logger.Error(err, "Failed to run ebpf manager")
}
log.Logger.V(0).Info("Runtime detector exited")
log.Logger.V(0).Info("Ebpf manager exited")
}()

// start OpAmp server
odigosNs := k8senv.GetCurrentNamespace()
wg.Add(1)
go func() {
defer wg.Done()
err := server.StartOpAmpServer(o.ctx, log.Logger, o.mgr, o.clientset, env.Current.NodeName, odigosNs)
err := server.StartOpAmpServer(ctx, log.Logger, o.mgr, o.clientset, env.Current.NodeName, odigosNs)
if err != nil {
log.Logger.Error(err, "Failed to start opamp server")
}
Expand All @@ -139,17 +142,19 @@ func (o *odiglet) run() {
wg.Add(1)
go func() {
defer wg.Done()
err := o.mgr.Start(o.ctx)
err := o.mgr.Start(ctx)
if err != nil {
log.Logger.Error(err, "error starting kube manager")
} else {
log.Logger.V(0).Info("Kube manager exited")
}
// the manager is stopped, it is now safe to close the config updates channel
if o.configUpdates != nil {
close(o.configUpdates)
}
log.Logger.V(0).Info("Kube manager exited")
}()

<-o.ctx.Done()
for _, director := range o.ebpfDirectors {
director.Shutdown()
}
<-ctx.Done()
wg.Wait()
}

Expand All @@ -176,7 +181,9 @@ func main() {
log.Logger.Error(err, "Failed to initialize odiglet")
os.Exit(1)
}
o.run()

ctx := signals.SetupSignalHandler()
o.run(ctx)

log.Logger.V(0).Info("odiglet exiting")
}
Expand Down Expand Up @@ -216,16 +223,3 @@ func runDeviceManager(clientset *kubernetes.Clientset) {
manager := dpm.NewManager(lister)
manager.Run()
}

func initEbpf(ctx context.Context, client client.Client, scheme *runtime.Scheme) (ebpf.DirectorsMap, error) {
goInstrumentationFactory := sdks.NewGoInstrumentationFactory(client)
goDirector := ebpf.NewEbpfDirector(ctx, client, scheme, common.GoProgrammingLanguage, goInstrumentationFactory)
goDirectorKey := ebpf.DirectorKey{
Language: common.GoProgrammingLanguage,
OtelSdk: common.OtelSdkEbpfCommunity,
}

return ebpf.DirectorsMap{
goDirectorKey: goDirector,
}, nil
}
67 changes: 34 additions & 33 deletions odiglet/go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/odigos-io/odigos/odiglet

go 1.22.0
go 1.22.7

require (
github.com/go-logr/logr v1.4.2
Expand All @@ -14,13 +14,14 @@ require (
github.com/odigos-io/odigos/opampserver v0.0.0
github.com/odigos-io/odigos/procdiscovery v0.0.0
github.com/odigos-io/opentelemetry-zap-bridge v0.0.5
github.com/odigos-io/runtime-detector v0.0.2
github.com/odigos-io/runtime-detector v0.0.3
github.com/stretchr/testify v1.10.0
go.opentelemetry.io/auto v0.17.0-alpha
go.opentelemetry.io/otel v1.31.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.31.0
go.opentelemetry.io/auto v0.18.0-alpha
go.opentelemetry.io/otel v1.32.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.32.0
go.uber.org/zap v1.27.0
google.golang.org/grpc v1.67.1
golang.org/x/sync v0.9.0
google.golang.org/grpc v1.68.0
k8s.io/api v0.31.0
k8s.io/apimachinery v0.31.0
k8s.io/client-go v0.31.0
Expand Down Expand Up @@ -52,7 +53,7 @@ require (
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect
github.com/hashicorp/go-version v1.7.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/josharian/intern v1.0.0 // indirect
Expand All @@ -66,45 +67,45 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.20.4 // indirect
github.com/prometheus/client_golang v1.20.5 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.60.0 // indirect
github.com/prometheus/common v0.60.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/x448/float16 v0.8.4 // indirect
go.opentelemetry.io/collector/pdata v1.18.0 // indirect
go.opentelemetry.io/contrib/bridges/prometheus v0.56.0 // indirect
go.opentelemetry.io/contrib/exporters/autoexport v0.56.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.7.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.7.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.31.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.31.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 // indirect
go.opentelemetry.io/otel/exporters/prometheus v0.53.0 // indirect
go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.7.0 // indirect
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.31.0 // indirect
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.31.0 // indirect
go.opentelemetry.io/otel/log v0.7.0 // indirect
go.opentelemetry.io/otel/metric v1.31.0 // indirect
go.opentelemetry.io/otel/sdk v1.31.0 // indirect
go.opentelemetry.io/otel/sdk/log v0.7.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.31.0 // indirect
go.opentelemetry.io/otel/trace v1.31.0 // indirect
go.opentelemetry.io/collector/pdata v1.19.0 // indirect
go.opentelemetry.io/contrib/bridges/prometheus v0.57.0 // indirect
go.opentelemetry.io/contrib/exporters/autoexport v0.57.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.8.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.8.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.32.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.32.0 // indirect
go.opentelemetry.io/otel/exporters/prometheus v0.54.0 // indirect
go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.8.0 // indirect
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.32.0 // indirect
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.32.0 // indirect
go.opentelemetry.io/otel/log v0.8.0 // indirect
go.opentelemetry.io/otel/metric v1.32.0 // indirect
go.opentelemetry.io/otel/sdk v1.32.0 // indirect
go.opentelemetry.io/otel/sdk/log v0.8.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.32.0 // indirect
go.opentelemetry.io/otel/trace v1.32.0 // indirect
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/arch v0.11.0 // indirect
golang.org/x/arch v0.12.0 // indirect
golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc // indirect
golang.org/x/net v0.30.0 // indirect
golang.org/x/oauth2 v0.23.0 // indirect
golang.org/x/sys v0.26.0 // indirect
golang.org/x/sys v0.27.0 // indirect
golang.org/x/term v0.25.0 // indirect
golang.org/x/text v0.19.0 // indirect
golang.org/x/text v0.20.0 // indirect
golang.org/x/time v0.3.0 // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241007155032-5fefd90f89a9 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect
google.golang.org/protobuf v1.35.1 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
Expand Down
Loading

0 comments on commit 5d612b2

Please sign in to comment.