Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

eBPF instrumentation manager #1776

Merged
merged 40 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
b947e22
[WIP] new instrumentations manager POC
RonFed Nov 5, 2024
e5e311b
Merge branch 'main' into instrumentation_manager
RonFed Nov 13, 2024
8239145
WIP
RonFed Nov 13, 2024
6ff8cc7
WIP
RonFed Nov 14, 2024
9a90a2d
Merge branch 'main' into instrumentation_manager
RonFed Nov 17, 2024
f4887a4
Add instrumentation instance updates and deletion to new manager
RonFed Nov 17, 2024
de9e868
Handle configuration updates and intial configuration
RonFed Nov 17, 2024
79f2469
Add documentation for the new Factory and Instrumentation interfaces
RonFed Nov 18, 2024
d826e97
remove un-used functions
RonFed Nov 23, 2024
d9178c8
Mark director as seprecated, only register the Pod reconciler if the …
RonFed Nov 24, 2024
b22a1ab
Merge branch 'main' into instrumentation_manager
RonFed Nov 24, 2024
0f2ce5a
Remove ddouble error loggging
RonFed Nov 24, 2024
dfd0b57
move instrumentationDetails to manager
RonFed Nov 24, 2024
748177a
add comments about settings
RonFed Nov 24, 2024
4afb623
Make proc events channel read only for manager
RonFed Nov 24, 2024
7a5c521
Make manger fields private
RonFed Nov 24, 2024
964288c
improve log message
RonFed Nov 24, 2024
7bbcf16
move process detected log message to debug level
RonFed Nov 24, 2024
76e918f
Make detector channel write-only
RonFed Nov 24, 2024
081c35c
Pass context to podFromProcEvent
RonFed Nov 24, 2024
549e93b
Add errRequiredEnvVarNotFound
RonFed Nov 24, 2024
8987c70
Add a map for instrumentations by workload, used for config updates
RonFed Nov 24, 2024
46bb2a3
Merge branch 'main' into instrumentation_manager
RonFed Nov 25, 2024
8e9f030
improve applyInstrumentationConfigurationForSDK readability
RonFed Nov 25, 2024
64ce1b4
move function from manager to k8sutils
RonFed Nov 25, 2024
ed29146
Add PodWorkloadObjectOrError wrapper
RonFed Nov 25, 2024
197f35b
Check nil SDK config for Go instrumenation and use default one if non…
RonFed Nov 25, 2024
159488d
Added comments and better nil handling
RonFed Nov 25, 2024
7fd76bc
Merge branch 'main' into instrumentation_manager
RonFed Nov 25, 2024
fb167f8
Update go.opentelemetry.io/auto to v0.18.0-alpha
RonFed Nov 25, 2024
7d585cf
Merge branch 'main' into instrumentation_manager
RonFed Nov 29, 2024
07a8948
Use a channel to pass config updates evenets from the reconciler to t…
RonFed Nov 29, 2024
7b0d434
Use OtelDistribution terminoligy for different factories IDs
RonFed Nov 29, 2024
5c1bb8d
Make the config updates channel buffered with a default size of 10
RonFed Nov 29, 2024
fa9496a
Remove un-necssery wrapper arount detector
RonFed Nov 29, 2024
de9052a
Merge branch 'main' into instrumentation_manager
RonFed Dec 2, 2024
dc25eba
Update runtime-detector to v0.0.3
RonFed Dec 2, 2024
f2f99d5
Add workloadConfigID to identify aggregate instrumentations for confi…
RonFed Dec 2, 2024
88017e8
Merge branch 'main' into instrumentation_manager
RonFed Dec 4, 2024
ffe5570
Merge branch 'main' into instrumentation_manager
RonFed Dec 4, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions k8sutils/pkg/container/container.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,33 @@
package container

import (
"errors"
"strings"

"github.com/odigos-io/odigos/common"
v1 "k8s.io/api/core/v1"
)

var (
ErrDeviceNotDetected = errors.New("device not detected")
ErrContainerNotInPodSpec = errors.New("container not found in pod spec")
)

func LanguageSdkFromPodContainer(pod *v1.Pod, containerName string) (common.ProgrammingLanguage, common.OtelSdk, error) {
for _, container := range pod.Spec.Containers {
if container.Name == containerName {
language, sdk, found := GetLanguageAndOtelSdk(container)
if !found {
return common.UnknownProgrammingLanguage, common.OtelSdk{}, ErrDeviceNotDetected
}

return language, sdk, nil
}
}

return common.UnknownProgrammingLanguage, common.OtelSdk{}, ErrContainerNotInPodSpec
}

func GetLanguageAndOtelSdk(container v1.Container) (common.ProgrammingLanguage, common.OtelSdk, bool) {
deviceName := podContainerDeviceName(container)
if deviceName == nil {
Expand Down
41 changes: 40 additions & 1 deletion k8sutils/pkg/workload/ownerreference.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,55 @@
package workload

import (
"context"
"errors"
"fmt"
"strings"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// PodWorkloadObjectOrError is the same as PodWorkloadObject but returns an error if the workload is not found.
func PodWorkloadObjectOrError(ctx context.Context, pod *corev1.Pod) (*PodWorkload, error) {
pw, err := PodWorkloadObject(ctx, pod)
if err != nil {
return nil, err
}

if pw == nil {
return nil, fmt.Errorf("workload not found for pod %s/%s", pod.Namespace, pod.Name)
}

return pw, nil
}

// PodWorkload returns the workload object that manages the provided pod.
// If the pod is not owned by a controller, it returns a nil workload with no error.
func PodWorkloadObject(ctx context.Context, pod *corev1.Pod) (*PodWorkload, error) {
for _, owner := range pod.OwnerReferences {
workloadName, workloadKind, err := GetWorkloadFromOwnerReference(owner)
if err != nil {
if errors.Is(err, ErrKindNotSupported) {
continue
}
return nil, IgnoreErrorKindNotSupported(err)
}

return &PodWorkload{
Name: workloadName,
Kind: workloadKind,
Namespace: pod.Namespace,
}, nil
}

// Pod does not necessarily have to be managed by a controller
return nil, nil
}

// GetWorkloadFromOwnerReference retrieves both the workload name and workload kind
// from the provided owner reference.
func GetWorkloadFromOwnerReference(ownerReference metav1.OwnerReference) (workloadName string, workloadKind WorkloadKind, err error) {

return GetWorkloadNameAndKind(ownerReference.Name, ownerReference.Kind)
}

Expand Down
76 changes: 35 additions & 41 deletions odiglet/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,22 @@ import (
"os"
"sync"

detector "github.com/odigos-io/odigos/odiglet/pkg/detector"
"github.com/odigos-io/odigos/odiglet/pkg/ebpf"
"github.com/odigos-io/odigos/odiglet/pkg/ebpf/sdks"
"github.com/odigos-io/odigos/odiglet/pkg/instrumentation/fs"

"github.com/kubevirt/device-plugin-manager/pkg/dpm"
"github.com/odigos-io/odigos/common"
k8senv "github.com/odigos-io/odigos/k8sutils/pkg/env"
"github.com/odigos-io/odigos/odiglet/pkg/ebpf"
"github.com/odigos-io/odigos/odiglet/pkg/env"
"github.com/odigos-io/odigos/odiglet/pkg/instrumentation"
"github.com/odigos-io/odigos/odiglet/pkg/instrumentation/instrumentlang"
"github.com/odigos-io/odigos/odiglet/pkg/kube"
"github.com/odigos-io/odigos/odiglet/pkg/log"
"github.com/odigos-io/odigos/opampserver/pkg/server"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"

_ "net/http/pprof"
Expand All @@ -45,8 +42,8 @@ func odigletInitPhase() {
type odiglet struct {
clientset *kubernetes.Clientset
mgr ctrl.Manager
ctx context.Context
ebpfDirectors ebpf.DirectorsMap
ebpfManager *ebpf.Manager
configUpdates chan<- ebpf.ConfigUpdate
}

func newOdiglet() (*odiglet, error) {
Expand All @@ -66,34 +63,42 @@ func newOdiglet() (*odiglet, error) {
return nil, fmt.Errorf("Failed to create controller-runtime manager %w", err)
}

ctx := signals.SetupSignalHandler()

ebpfDirectors, err := initEbpf(ctx, mgr.GetClient(), mgr.GetScheme())
ebpfManager, err := ebpf.NewManager(
mgr.GetClient(),
log.Logger,
map[ebpf.OtelDistribution]ebpf.Factory{
ebpf.OtelDistribution{
Language: common.GoProgrammingLanguage,
OtelSdk: common.OtelSdkEbpfCommunity,
}: sdks.NewGoInstrumentationFactory(),
},
)
if err != nil {
return nil, fmt.Errorf("Failed to init eBPF director %w", err)
return nil, fmt.Errorf("Failed to create ebpf manager %w", err)
}

err = kube.SetupWithManager(mgr, ebpfDirectors, clientset)
configUpdates := ebpfManager.ConfigUpdates()
err = kube.SetupWithManager(mgr, nil, clientset, configUpdates)
if err != nil {
return nil, fmt.Errorf("Failed to setup controller-runtime manager %w", err)
}

return &odiglet{
clientset: clientset,
mgr: mgr,
ctx: ctx,
ebpfDirectors: ebpfDirectors,
ebpfManager: ebpfManager,
configUpdates: configUpdates,
}, nil
}

func (o *odiglet) run() {
func (o *odiglet) run(ctx context.Context) {
var wg sync.WaitGroup

// Start pprof server
wg.Add(1)
go func() {
defer wg.Done()
err := common.StartPprofServer(o.ctx, log.Logger)
err := common.StartPprofServer(ctx, log.Logger)
if err != nil {
log.Logger.Error(err, "Failed to start pprof server")
} else {
Expand All @@ -111,24 +116,22 @@ func (o *odiglet) run() {
log.Logger.V(0).Info("Device manager exited")
}()

procEvents := make(chan detector.ProcessEvent)
wg.Add(1)
go func() {
defer wg.Done()
err := detector.StartRuntimeDetector(o.ctx, log.Logger, procEvents)
err := o.ebpfManager.Run(ctx)
if err != nil {
log.Logger.Error(err, "Failed to start runtime detector")
os.Exit(-1)
log.Logger.Error(err, "Failed to run ebpf manager")
}
log.Logger.V(0).Info("Runtime detector exited")
log.Logger.V(0).Info("Ebpf manager exited")
}()

// start OpAmp server
odigosNs := k8senv.GetCurrentNamespace()
wg.Add(1)
go func() {
defer wg.Done()
err := server.StartOpAmpServer(o.ctx, log.Logger, o.mgr, o.clientset, env.Current.NodeName, odigosNs)
err := server.StartOpAmpServer(ctx, log.Logger, o.mgr, o.clientset, env.Current.NodeName, odigosNs)
if err != nil {
log.Logger.Error(err, "Failed to start opamp server")
}
Expand All @@ -139,17 +142,19 @@ func (o *odiglet) run() {
wg.Add(1)
go func() {
defer wg.Done()
err := o.mgr.Start(o.ctx)
err := o.mgr.Start(ctx)
if err != nil {
log.Logger.Error(err, "error starting kube manager")
} else {
log.Logger.V(0).Info("Kube manager exited")
}
// the manager is stopped, it is now safe to close the config updates channel
if o.configUpdates != nil {
close(o.configUpdates)
}
log.Logger.V(0).Info("Kube manager exited")
}()

<-o.ctx.Done()
for _, director := range o.ebpfDirectors {
director.Shutdown()
}
<-ctx.Done()
wg.Wait()
}

Expand All @@ -176,7 +181,9 @@ func main() {
log.Logger.Error(err, "Failed to initialize odiglet")
os.Exit(1)
}
o.run()

ctx := signals.SetupSignalHandler()
o.run(ctx)

log.Logger.V(0).Info("odiglet exiting")
}
Expand Down Expand Up @@ -216,16 +223,3 @@ func runDeviceManager(clientset *kubernetes.Clientset) {
manager := dpm.NewManager(lister)
manager.Run()
}

func initEbpf(ctx context.Context, client client.Client, scheme *runtime.Scheme) (ebpf.DirectorsMap, error) {
goInstrumentationFactory := sdks.NewGoInstrumentationFactory(client)
goDirector := ebpf.NewEbpfDirector(ctx, client, scheme, common.GoProgrammingLanguage, goInstrumentationFactory)
goDirectorKey := ebpf.DirectorKey{
Language: common.GoProgrammingLanguage,
OtelSdk: common.OtelSdkEbpfCommunity,
}

return ebpf.DirectorsMap{
goDirectorKey: goDirector,
}, nil
}
67 changes: 34 additions & 33 deletions odiglet/go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/odigos-io/odigos/odiglet

go 1.22.0
go 1.22.7

require (
github.com/go-logr/logr v1.4.2
Expand All @@ -14,13 +14,14 @@ require (
github.com/odigos-io/odigos/opampserver v0.0.0
github.com/odigos-io/odigos/procdiscovery v0.0.0
github.com/odigos-io/opentelemetry-zap-bridge v0.0.5
github.com/odigos-io/runtime-detector v0.0.2
github.com/odigos-io/runtime-detector v0.0.3
github.com/stretchr/testify v1.10.0
go.opentelemetry.io/auto v0.17.0-alpha
go.opentelemetry.io/otel v1.31.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.31.0
go.opentelemetry.io/auto v0.18.0-alpha
go.opentelemetry.io/otel v1.32.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.32.0
go.uber.org/zap v1.27.0
google.golang.org/grpc v1.67.1
golang.org/x/sync v0.9.0
google.golang.org/grpc v1.68.0
k8s.io/api v0.31.0
k8s.io/apimachinery v0.31.0
k8s.io/client-go v0.31.0
Expand Down Expand Up @@ -52,7 +53,7 @@ require (
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect
github.com/hashicorp/go-version v1.7.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/josharian/intern v1.0.0 // indirect
Expand All @@ -66,45 +67,45 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.20.4 // indirect
github.com/prometheus/client_golang v1.20.5 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.60.0 // indirect
github.com/prometheus/common v0.60.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/x448/float16 v0.8.4 // indirect
go.opentelemetry.io/collector/pdata v1.18.0 // indirect
go.opentelemetry.io/contrib/bridges/prometheus v0.56.0 // indirect
go.opentelemetry.io/contrib/exporters/autoexport v0.56.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.7.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.7.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.31.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.31.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 // indirect
go.opentelemetry.io/otel/exporters/prometheus v0.53.0 // indirect
go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.7.0 // indirect
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.31.0 // indirect
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.31.0 // indirect
go.opentelemetry.io/otel/log v0.7.0 // indirect
go.opentelemetry.io/otel/metric v1.31.0 // indirect
go.opentelemetry.io/otel/sdk v1.31.0 // indirect
go.opentelemetry.io/otel/sdk/log v0.7.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.31.0 // indirect
go.opentelemetry.io/otel/trace v1.31.0 // indirect
go.opentelemetry.io/collector/pdata v1.19.0 // indirect
go.opentelemetry.io/contrib/bridges/prometheus v0.57.0 // indirect
go.opentelemetry.io/contrib/exporters/autoexport v0.57.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc v0.8.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.8.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.32.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.32.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.32.0 // indirect
go.opentelemetry.io/otel/exporters/prometheus v0.54.0 // indirect
go.opentelemetry.io/otel/exporters/stdout/stdoutlog v0.8.0 // indirect
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.32.0 // indirect
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.32.0 // indirect
go.opentelemetry.io/otel/log v0.8.0 // indirect
go.opentelemetry.io/otel/metric v1.32.0 // indirect
go.opentelemetry.io/otel/sdk v1.32.0 // indirect
go.opentelemetry.io/otel/sdk/log v0.8.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.32.0 // indirect
go.opentelemetry.io/otel/trace v1.32.0 // indirect
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/arch v0.11.0 // indirect
golang.org/x/arch v0.12.0 // indirect
golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc // indirect
golang.org/x/net v0.30.0 // indirect
golang.org/x/oauth2 v0.23.0 // indirect
golang.org/x/sys v0.26.0 // indirect
golang.org/x/sys v0.27.0 // indirect
golang.org/x/term v0.25.0 // indirect
golang.org/x/text v0.19.0 // indirect
golang.org/x/text v0.20.0 // indirect
golang.org/x/time v0.3.0 // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241007155032-5fefd90f89a9 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect
google.golang.org/protobuf v1.35.1 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
Expand Down
Loading
Loading