diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 1a18ff5b9..ead66d4b5 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -28,7 +28,7 @@ jobs: uses: ./.github/workflows/env - name: Initialize CodeQL - uses: github/codeql-action/init@96f518a34f7a870018057716cc4d7a5c014bd61c # v3.29.10 + uses: github/codeql-action/init@3c3833e0f8c1c83d449a7478aa59c036a9165498 # v3.29.11 with: languages: go @@ -37,7 +37,7 @@ jobs: make TARGET_ARCH=${{ matrix.target_arch }} - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@96f518a34f7a870018057716cc4d7a5c014bd61c # v3.29.10 + uses: github/codeql-action/analyze@3c3833e0f8c1c83d449a7478aa59c036a9165498 # v3.29.11 with: category: "/language:Go" timeout-minutes: 10 diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 9f803e011..7b95bbfeb 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -42,6 +42,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@96f518a34f7a870018057716cc4d7a5c014bd61c # v3.29.10 + uses: github/codeql-action/upload-sarif@3c3833e0f8c1c83d449a7478aa59c036a9165498 # v3.29.11 with: sarif_file: results.sarif diff --git a/.github/workflows/push-docker-image.yml b/.github/workflows/push-docker-image.yml index 85e66dd84..126ae002f 100644 --- a/.github/workflows/push-docker-image.yml +++ b/.github/workflows/push-docker-image.yml @@ -1,6 +1,7 @@ name: "Update builder docker image" on: + workflow_dispatch: # Allows manual triggering of the workflow push: branches: ["main"] paths: diff --git a/.github/workflows/unit-test-on-pull-request.yml b/.github/workflows/unit-test-on-pull-request.yml index 9500a6094..edc0a1163 100644 --- a/.github/workflows/unit-test-on-pull-request.yml +++ b/.github/workflows/unit-test-on-pull-request.yml @@ -90,7 +90,7 @@ jobs: check-binary-blobs: name: Check for differences in the eBPF binary blobs runs-on: ubuntu-24.04 - container: otel/opentelemetry-ebpf-profiler-dev:latest@sha256:db6081344e85ef95317b19dbf667d56df35812353b23d0fd54e1db0f55436b80 + container: otel/opentelemetry-ebpf-profiler-dev:latest@sha256:6ab9b5ff6c2a457be97a389887caf9f3cd5344f760fdab0101b9965236bbb2db defaults: run: shell: bash --login {0} diff --git a/Cargo.lock b/Cargo.lock index 719086a59..dde85ef4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,9 +197,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.32.0" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93563d740bc9ef04104f9ed6f86f1e3275c2cdafb95664e26584b9ca807a8ffe" +checksum = "cc6298e594375a7fead9efd5568f0a46e6a154fb6a9bdcbe3c06946ffd81a5f6" dependencies = [ "fallible-iterator", "stable_deref_trait", @@ -292,9 +292,9 @@ checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "memmap2" -version = "0.9.7" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "483758ad303d734cec05e5c12b41d7e93e6a6390c5e9dae6bdeb7c1259012d28" +checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" dependencies = [ "libc", ] diff --git a/cli_flags.go b/cli_flags.go index bb2282ad4..db958701e 100644 --- a/cli_flags.go +++ b/cli_flags.go @@ -69,6 +69,10 @@ var ( defaultOffCPUThreshold) envVarsHelp = "Comma separated list of environment variables that will be reported with the" + "captured profiling samples." + probeLinkHelper = "Attach a probe to a symbol of an executable. " + + "Expected format: /path/to/executable:symbol" + loadProbeHelper = "Load generic eBPF program that can be attached externally to " + + "various user or kernel space hooks." ) // Package-scope variable, so that conditionally compiled other components can refer @@ -127,6 +131,13 @@ func parseArgs() (*controller.Config, error) { fs.StringVar(&args.IncludeEnvVars, "env-vars", defaultEnvVarsValue, envVarsHelp) + fs.Func("uprobe-link", probeLinkHelper, func(link string) error { + args.UProbeLinks = append(args.UProbeLinks, link) + return nil + }) + + fs.BoolVar(&args.LoadProbe, "load-probe", false, loadProbeHelper) + fs.Usage = func() { fs.PrintDefaults() } diff --git a/internal/controller/config.go b/internal/controller/config.go index 66111b781..38fe6e022 100644 --- a/internal/controller/config.go +++ b/internal/controller/config.go @@ -33,6 +33,8 @@ type Config struct { VerboseMode bool Version bool OffCPUThreshold float64 + UProbeLinks []string + LoadProbe bool Reporter reporter.Reporter diff --git a/internal/controller/controller.go b/internal/controller/controller.go index 95bde52bf..b827696e0 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -98,6 +98,8 @@ func (c *Controller) Start(ctx context.Context) error { ProbabilisticThreshold: c.config.ProbabilisticThreshold, OffCPUThreshold: uint32(c.config.OffCPUThreshold * float64(math.MaxUint32)), IncludeEnvVars: envVars, + UProbeLinks: c.config.UProbeLinks, + LoadProbe: c.config.LoadProbe, }) if err != nil { return fmt.Errorf("failed to load eBPF tracer: %w", err) @@ -125,6 +127,13 @@ func (c *Controller) Start(ctx context.Context) error { log.Printf("Enabled off-cpu profiling with p=%f", c.config.OffCPUThreshold) } + if len(c.config.UProbeLinks) > 0 { + if err := trc.AttachUProbes(c.config.UProbeLinks); err != nil { + return fmt.Errorf("failed to attach uprobes: %v", err) + } + log.Printf("Attached uprobes") + } + if c.config.ProbabilisticThreshold < tracer.ProbabilisticThresholdMax { trc.StartProbabilisticProfiling(ctx) log.Printf("Enabled probabilistic profiling") diff --git a/interpreter/apmint/apmint.go b/interpreter/apmint/apmint.go index ddfe142a8..bc0fabe3e 100644 --- a/interpreter/apmint/apmint.go +++ b/interpreter/apmint/apmint.go @@ -19,7 +19,6 @@ import ( "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/libpf" - "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" "go.opentelemetry.io/ebpf-profiler/remotememory" "go.opentelemetry.io/ebpf-profiler/support" ) @@ -65,7 +64,7 @@ func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interprete // Resolve process storage symbol. procStorageSym, err := ef.LookupSymbol(procStorageExport) if err != nil { - if errors.Is(err, pfelf.ErrSymbolNotFound) { + if errors.Is(err, libpf.ErrSymbolNotFound) { // APM<->profiling integration not supported by agent. return nil, nil } diff --git a/interpreter/customlabels/customlabels.go b/interpreter/customlabels/customlabels.go index 03ebb8acf..77cbbd566 100644 --- a/interpreter/customlabels/customlabels.go +++ b/interpreter/customlabels/customlabels.go @@ -8,7 +8,6 @@ import ( "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/libpf" - "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" "go.opentelemetry.io/ebpf-profiler/remotememory" "go.opentelemetry.io/ebpf-profiler/support" ) @@ -44,7 +43,7 @@ func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interprete } abiVersionSym, err := ef.LookupSymbol(abiVersionExport) if err != nil { - if errors.Is(err, pfelf.ErrSymbolNotFound) { + if errors.Is(err, libpf.ErrSymbolNotFound) { return nil, nil } diff --git a/interpreter/golabels/golabels.go b/interpreter/golabels/golabels.go index dd1227730..274167fcb 100644 --- a/interpreter/golabels/golabels.go +++ b/interpreter/golabels/golabels.go @@ -58,7 +58,7 @@ func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interprete log.Debugf("file %s detected as go version %s", info.FileName(), goVersion) offsets := getOffsets(goVersion) - tlsOffset, err := extractTLSGOffset(file, info.FileName()) + tlsOffset, err := extractTLSGOffset(file) if err != nil { return nil, fmt.Errorf("failed to extract TLS offset: %w", err) } diff --git a/interpreter/golabels/tls_amd64.go b/interpreter/golabels/tls_amd64.go index 61c721c05..f4bf716f9 100644 --- a/interpreter/golabels/tls_amd64.go +++ b/interpreter/golabels/tls_amd64.go @@ -8,6 +8,7 @@ package golabels // import "go.opentelemetry.io/ebpf-profiler/interpreter/golabe import ( log "github.com/sirupsen/logrus" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" + "go.opentelemetry.io/ebpf-profiler/nativeunwind/elfunwindinfo" "golang.org/x/arch/x86/x86asm" ) @@ -15,19 +16,19 @@ import ( // storing the current g but "static" binaries it ends up as -80. There // may be dynamic relocating going on so just read it from a known // symbol if possible. -func extractTLSGOffset(f *pfelf.File, path string) (int32, error) { - syms, err := f.ReadSymbols() +func extractTLSGOffset(f *pfelf.File) (int32, error) { + pclntab, err := elfunwindinfo.NewGopclntab(f) if err != nil { log.Debugf("Failed to find symbols (%v) using default TLSG offset", err) return -8, nil } + defer pclntab.Close() + // Dump of assembler code for function runtime.stackcheck: // 0x0000000000470080 <+0>: mov %fs:0xfffffffffffffff8,%rax - sym, err := syms.LookupSymbol("runtime.stackcheck.abi0") + sym, err := pclntab.LookupSymbol("runtime.stackcheck") if err != nil { - // Binary must be stripped, hope default is correct and warn. - log.Warnf("Failed to find stackcheck symbol, Go labels might not work: %v (%s)", err, path) - return -8, nil + return 0, err } b, err := f.VirtualMemory(int64(sym.Address), 16, 16) if err != nil { @@ -62,6 +63,6 @@ func extractTLSGOffset(f *pfelf.File, path string) (int32, error) { } } exit: - log.Warnf("Failed to decode stackcheck symbol, Go label collection might not work %s", path) + log.Warnf("Failed to decode stackcheck symbol, Go label collection might not work") return -8, nil } diff --git a/interpreter/golabels/tls_arm64.go b/interpreter/golabels/tls_arm64.go index bb9e827fd..c21c870f5 100644 --- a/interpreter/golabels/tls_arm64.go +++ b/interpreter/golabels/tls_arm64.go @@ -21,7 +21,7 @@ import ( // 0x000000000007f270 <+16>: mov x27, #0x30 // #48 // 0x000000000007f274 <+20>: ldr x28, [x0, x27] // 0x000000000007f278 <+24>: ret -func extractTLSGOffset(f *pfelf.File, path string) (int32, error) { +func extractTLSGOffset(f *pfelf.File) (int32, error) { iscgo, err := f.IsCgoEnabled() if err != nil || !iscgo { return 0, err @@ -55,6 +55,6 @@ func extractTLSGOffset(f *pfelf.File, path string) (int32, error) { } } } - log.Warnf("Failed to decode load_g symbol, Go label collection might not work with CGO frames (%s)", path) + log.Warnf("Failed to decode load_g symbol, Go label collection might not work with CGO frames") return 0, nil } diff --git a/interpreter/luajit/offsets.go b/interpreter/luajit/offsets.go index c847e2d89..b4088cc19 100644 --- a/interpreter/luajit/offsets.go +++ b/interpreter/luajit/offsets.go @@ -358,7 +358,7 @@ func (o *offsetData) readSym(sym *libpf.Symbol) ([]byte, error) { func (o *offsetData) lookupSymbol(name libpf.SymbolName) (s *libpf.Symbol, err error) { s, err = o.f.LookupSymbol(name) - if err == pfelf.ErrSymbolNotFound && o.syms != nil { + if err == libpf.ErrSymbolNotFound && o.syms != nil { s, err = o.syms.LookupSymbol(name) } if s == nil && o.dsyms != nil { diff --git a/libpf/pfelf/file.go b/libpf/pfelf/file.go index 01da01166..57fba12d7 100644 --- a/libpf/pfelf/file.go +++ b/libpf/pfelf/file.go @@ -51,9 +51,6 @@ const ( // List of public errors. var ( - // ErrSymbolNotFound is returned when the requested symbol was not found. - ErrSymbolNotFound = errors.New("symbol not found") - // ErrNotELF is returned when the file is not an ELF file. ErrNotELF = errors.New("not an ELF file") ) @@ -1050,7 +1047,7 @@ func (f *File) LookupSymbol(symbol libpf.SymbolName) (*libpf.Symbol, error) { mask := uint(1)<<(h%ptrSizeBits) | uint(1)<<((h>>hdr.bloomShift)%ptrSizeBits) if bloom&mask != mask { - return nil, ErrSymbolNotFound + return nil, libpf.ErrSymbolNotFound } // Read the initial symbol index to start looking from @@ -1061,7 +1058,7 @@ func (f *File) LookupSymbol(symbol libpf.SymbolName) (*libpf.Symbol, error) { return nil, err } if i == 0 { - return nil, ErrSymbolNotFound + return nil, libpf.ErrSymbolNotFound } // Search the hash bucket @@ -1116,7 +1113,7 @@ func (f *File) LookupSymbol(symbol libpf.SymbolName) (*libpf.Symbol, error) { return nil, errors.New("symbol hash not present") } - return nil, ErrSymbolNotFound + return nil, libpf.ErrSymbolNotFound } // LookupSymbol searches for a given symbol in the ELF diff --git a/libpf/symbol.go b/libpf/symbol.go index dd3c490ae..fd338589e 100644 --- a/libpf/symbol.go +++ b/libpf/symbol.go @@ -4,11 +4,18 @@ package libpf // import "go.opentelemetry.io/ebpf-profiler/libpf" import ( + "errors" "fmt" "sort" "strings" ) +// List of public errors. +var ( + // ErrSymbolNotFound is returned when the requested symbol was not found. + ErrSymbolNotFound = errors.New("symbol not found") +) + // SymbolValue represents the value associated with a symbol, e.g. either an // offset or an absolute address type SymbolValue uint64 diff --git a/nativeunwind/elfunwindinfo/elfgopclntab.go b/nativeunwind/elfunwindinfo/elfgopclntab.go index 03cf1727d..4d82a104e 100644 --- a/nativeunwind/elfunwindinfo/elfgopclntab.go +++ b/nativeunwind/elfunwindinfo/elfgopclntab.go @@ -17,6 +17,7 @@ import ( "strings" "unsafe" + "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" sdtypes "go.opentelemetry.io/ebpf-profiler/nativeunwind/stackdeltatypes" "go.opentelemetry.io/ebpf-profiler/support" @@ -351,6 +352,30 @@ type Gopclntab struct { functab, funcdata, funcnametab, filetab, pctab, cutab []byte } +// LookupSymbol searches for a given symbol in .gopclntab. +func (g *Gopclntab) LookupSymbol(symbol libpf.SymbolName) (*libpf.Symbol, error) { + symString := string(symbol) + for i := 0; i < g.numFuncs; i++ { + _, funcOff := g.getFuncMapEntry(i) + pc, fun := g.getFunc(funcOff) + if fun == nil { + continue + } + name := getString(g.funcnametab, int(fun.nameOff)) + if name == symString { + nextPc, _ := g.getFuncMapEntry(i + 1) + size := uint64(nextPc - pc) + + return &libpf.Symbol{ + Name: symbol, + Address: libpf.SymbolValue(pc), + Size: size, + }, nil + } + } + return nil, libpf.ErrSymbolNotFound +} + // NewGopclntab parses and returns the parsed data for further operations. func NewGopclntab(ef *pfelf.File) (*Gopclntab, error) { data, err := extractGoPclntab(ef) diff --git a/reporter/base_reporter.go b/reporter/base_reporter.go index aa380fafc..8a49f26d2 100644 --- a/reporter/base_reporter.go +++ b/reporter/base_reporter.go @@ -4,12 +4,9 @@ package reporter // import "go.opentelemetry.io/ebpf-profiler/reporter" import ( - "context" "errors" "fmt" - "time" - lru "github.com/elastic/go-freelru" "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/libpf/xsync" "go.opentelemetry.io/ebpf-profiler/reporter/internal/pdata" @@ -35,9 +32,6 @@ type baseReporter struct { // traceEvents stores reported trace events (trace metadata with frames and counts) traceEvents xsync.RWMutex[samples.TraceEventsTree] - - // hostmetadata stores metadata that is sent out with every request. - hostmetadata *lru.SyncedLRU[string, string] } var errUnknownOrigin = errors.New("unknown trace origin") @@ -46,23 +40,6 @@ func (b *baseReporter) Stop() { b.runLoop.Stop() } -func (b *baseReporter) ReportHostMetadata(metadataMap map[string]string) { - b.addHostmetadata(metadataMap) -} - -func (b *baseReporter) ReportHostMetadataBlocking(_ context.Context, - metadataMap map[string]string, _ int, _ time.Duration) error { - b.addHostmetadata(metadataMap) - return nil -} - -// addHostmetadata adds to and overwrites host metadata. -func (b *baseReporter) addHostmetadata(metadataMap map[string]string) { - for k, v := range metadataMap { - b.hostmetadata.Add(k, v) - } -} - func (b *baseReporter) ExecutableKnown(fileID libpf.FileID) bool { _, known := b.pdata.Executables.GetAndRefresh(fileID, pdata.ExecutableCacheLifetime) return known @@ -76,8 +53,11 @@ func (b *baseReporter) ExecutableMetadata(args *ExecutableMetadataArgs) { } func (b *baseReporter) ReportTraceEvent(trace *libpf.Trace, meta *samples.TraceEventMeta) error { - if meta.Origin != support.TraceOriginSampling && meta.Origin != support.TraceOriginOffCPU { - // At the moment only on-CPU and off-CPU traces are reported. + switch meta.Origin { + case support.TraceOriginSampling: + case support.TraceOriginOffCPU: + case support.TraceOriginUProbe: + default: return fmt.Errorf("skip reporting trace for %d origin: %w", meta.Origin, errUnknownOrigin) } @@ -124,6 +104,7 @@ func (b *baseReporter) ReportTraceEvent(trace *libpf.Trace, meta *samples.TraceE Timestamps: []uint64{uint64(meta.Timestamp)}, OffTimes: []int64{meta.OffTime}, EnvVars: meta.EnvVars, + Labels: trace.CustomLabels, } return nil } diff --git a/reporter/collector_reporter.go b/reporter/collector_reporter.go index 050fd354a..b6d604bf8 100644 --- a/reporter/collector_reporter.go +++ b/reporter/collector_reporter.go @@ -6,7 +6,6 @@ package reporter // import "go.opentelemetry.io/ebpf-profiler/reporter" import ( "context" - lru "github.com/elastic/go-freelru" log "github.com/sirupsen/logrus" "go.opentelemetry.io/collector/consumer/xconsumer" @@ -28,14 +27,6 @@ type CollectorReporter struct { // NewCollector builds a new CollectorReporter func NewCollector(cfg *Config, nextConsumer xconsumer.Profiles) (*CollectorReporter, error) { - // Next step: Dynamically configure the size of this LRU. - // Currently, we use the length of the JSON array in - // hostmetadata/hostmetadata.json. - hostmetadata, err := lru.NewSynced[string, string](115, hashString) - if err != nil { - return nil, err - } - data, err := pdata.New( cfg.SamplesPerSecond, cfg.ExecutablesCacheElements, @@ -49,12 +40,11 @@ func NewCollector(cfg *Config, nextConsumer xconsumer.Profiles) (*CollectorRepor return &CollectorReporter{ baseReporter: &baseReporter{ - cfg: cfg, - name: cfg.Name, - version: cfg.Version, - pdata: data, - traceEvents: xsync.NewRWMutex(tree), - hostmetadata: hostmetadata, + cfg: cfg, + name: cfg.Name, + version: cfg.Version, + pdata: data, + traceEvents: xsync.NewRWMutex(tree), runLoop: &runLoop{ stopSignal: make(chan libpf.Void), }, diff --git a/reporter/iface.go b/reporter/iface.go index cf56a5b80..fed157466 100644 --- a/reporter/iface.go +++ b/reporter/iface.go @@ -5,7 +5,6 @@ package reporter // import "go.opentelemetry.io/ebpf-profiler/reporter" import ( "context" - "time" "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/process" @@ -16,7 +15,6 @@ import ( type Reporter interface { TraceReporter SymbolReporter - HostMetadataReporter // Start starts the reporter in the background. // @@ -77,12 +75,3 @@ type SymbolReporter interface { // open the file and then enqueue the upload in the background. ExecutableMetadata(args *ExecutableMetadataArgs) } - -type HostMetadataReporter interface { - // ReportHostMetadata enqueues host metadata for sending (to the collection agent). - ReportHostMetadata(metadataMap map[string]string) - - // ReportHostMetadataBlocking sends host metadata to the collection agent. - ReportHostMetadataBlocking(ctx context.Context, metadataMap map[string]string, - maxRetries int, waitRetry time.Duration) error -} diff --git a/reporter/internal/pdata/generate.go b/reporter/internal/pdata/generate.go index 7d583c70d..c049a2eb0 100644 --- a/reporter/internal/pdata/generate.go +++ b/reporter/internal/pdata/generate.go @@ -64,6 +64,7 @@ func (p *Pdata) Generate(tree samples.TraceEventsTree, for _, origin := range []libpf.Origin{ support.TraceOriginSampling, support.TraceOriginOffCPU, + support.TraceOriginUProbe, } { if len(originToEvents[origin]) == 0 { // Do not append empty profiles. @@ -125,6 +126,9 @@ func (p *Pdata) setProfile( case support.TraceOriginOffCPU: st.SetTypeStrindex(stringSet.Add("events")) st.SetUnitStrindex(stringSet.Add("nanoseconds")) + case support.TraceOriginUProbe: + st.SetTypeStrindex(stringSet.Add("events")) + st.SetUnitStrindex(stringSet.Add("count")) default: // Should never happen return fmt.Errorf("generating profile for unsupported origin %d", origin) @@ -149,6 +153,8 @@ func (p *Pdata) setProfile( sample.Value().Append(1) case support.TraceOriginOffCPU: sample.Value().Append(traceInfo.OffTimes...) + case support.TraceOriginUProbe: + sample.Value().Append(1) } // Walk every frame of the trace. @@ -251,6 +257,14 @@ func (p *Pdata) setProfile( attribute.Key("process.environment_variable."+key), value) } + for key, value := range traceInfo.Labels { + // Once https://github.com/open-telemetry/semantic-conventions/issues/2561 + // reached an agreement, use the actual OTel SemConv attribute. + attrMgr.AppendOptionalString( + sample.AttributeIndices(), + attribute.Key("process.context.label."+key), + value) + } if p.ExtraSampleAttrProd != nil { extra := p.ExtraSampleAttrProd.ExtraSampleAttrs(attrMgr, traceKey.ExtraMeta) diff --git a/reporter/otlp_reporter.go b/reporter/otlp_reporter.go index 4b198f222..3360e2ef0 100644 --- a/reporter/otlp_reporter.go +++ b/reporter/otlp_reporter.go @@ -8,7 +8,6 @@ import ( "crypto/tls" "time" - lru "github.com/elastic/go-freelru" log "github.com/sirupsen/logrus" "go.opentelemetry.io/collector/pdata/pprofile/pprofileotlp" "google.golang.org/grpc" @@ -44,14 +43,6 @@ type OTLPReporter struct { // NewOTLP returns a new instance of OTLPReporter func NewOTLP(cfg *Config) (*OTLPReporter, error) { - // Next step: Dynamically configure the size of this LRU. - // Currently, we use the length of the JSON array in - // hostmetadata/hostmetadata.json. - hostmetadata, err := lru.NewSynced[string, string](115, hashString) - if err != nil { - return nil, err - } - data, err := pdata.New( cfg.SamplesPerSecond, cfg.ExecutablesCacheElements, @@ -65,12 +56,11 @@ func NewOTLP(cfg *Config) (*OTLPReporter, error) { return &OTLPReporter{ baseReporter: &baseReporter{ - cfg: cfg, - name: cfg.Name, - version: cfg.Version, - pdata: data, - traceEvents: xsync.NewRWMutex(eventsTree), - hostmetadata: hostmetadata, + cfg: cfg, + name: cfg.Name, + version: cfg.Version, + pdata: data, + traceEvents: xsync.NewRWMutex(eventsTree), runLoop: &runLoop{ stopSignal: make(chan libpf.Void), }, diff --git a/reporter/samples/samples.go b/reporter/samples/samples.go index 988269339..4b9f1f144 100644 --- a/reporter/samples/samples.go +++ b/reporter/samples/samples.go @@ -29,6 +29,7 @@ type TraceEvents struct { Timestamps []uint64 // in nanoseconds OffTimes []int64 // in nanoseconds EnvVars map[string]string + Labels map[string]string } // TraceAndMetaKey is the deduplication key for samples. This **must always** diff --git a/reporter/util.go b/reporter/util.go deleted file mode 100644 index 6eba1fa6e..000000000 --- a/reporter/util.go +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package reporter // import "go.opentelemetry.io/ebpf-profiler/reporter" - -import "github.com/zeebo/xxh3" - -// hashString is a helper function for LRUs that use string as a key. -// Xxh3 turned out to be the fastest hash function for strings in the FreeLRU benchmarks. -// It was only outperformed by the AES hash function, which is implemented in Plan9 assembly. -func hashString(s string) uint32 { - return uint32(xxh3.HashString(s)) -} diff --git a/support/ebpf/off_cpu.ebpf.c b/support/ebpf/off_cpu.ebpf.c index 24fc2312c..ff45ebed9 100644 --- a/support/ebpf/off_cpu.ebpf.c +++ b/support/ebpf/off_cpu.ebpf.c @@ -51,10 +51,10 @@ int tracepoint__sched_switch(UNUSED void *ctx) return 0; } -// dummy is never loaded or called. It just makes sure kprobe_progs is +// kprobe__dummy is never loaded or called. It just makes sure kprobe_progs is // referenced and make the compiler and linker happy. SEC("kprobe/dummy") -int dummy(struct pt_regs *ctx) +int kprobe__dummy(struct pt_regs *ctx) { bpf_tail_call(ctx, &kprobe_progs, 0); return 0; diff --git a/support/ebpf/tracer.ebpf.amd64 b/support/ebpf/tracer.ebpf.amd64 index 282c1802e..be5e4c8bc 100644 Binary files a/support/ebpf/tracer.ebpf.amd64 and b/support/ebpf/tracer.ebpf.amd64 differ diff --git a/support/ebpf/tracer.ebpf.arm64 b/support/ebpf/tracer.ebpf.arm64 index 84197ee83..1f12ac249 100644 Binary files a/support/ebpf/tracer.ebpf.arm64 and b/support/ebpf/tracer.ebpf.arm64 differ diff --git a/support/ebpf/types.h b/support/ebpf/types.h index ace2947fc..428db3c40 100644 --- a/support/ebpf/types.h +++ b/support/ebpf/types.h @@ -398,6 +398,7 @@ typedef enum TraceOrigin { TRACE_UNKNOWN, TRACE_SAMPLING, TRACE_OFF_CPU, + TRACE_UPROBE, TRACE_MEMORY, } TraceOrigin; diff --git a/support/ebpf/uprobe.ebpf.c b/support/ebpf/uprobe.ebpf.c new file mode 100644 index 000000000..2414f7a8e --- /dev/null +++ b/support/ebpf/uprobe.ebpf.c @@ -0,0 +1,20 @@ +#include "bpfdefs.h" +#include "tracemgmt.h" +#include "types.h" + +// uprobe__generic serves as entry point for uprobe based profiling. +SEC("uprobe/generic") +int uprobe__generic(void *ctx) +{ + u64 pid_tgid = bpf_get_current_pid_tgid(); + u32 pid = pid_tgid >> 32; + u32 tid = pid_tgid & 0xFFFFFFFF; + + if (pid == 0 || tid == 0) { + return 0; + } + + u64 ts = bpf_ktime_get_ns(); + + return collect_trace(ctx, TRACE_UPROBE, pid, tid, ts, 0); +} diff --git a/support/types.go b/support/types.go index f611422c5..3e46c1985 100644 --- a/support/types.go +++ b/support/types.go @@ -89,7 +89,8 @@ const ( TraceOriginUnknown = 0x0 TraceOriginSampling = 0x1 TraceOriginOffCPU = 0x2 - TraceOriginMemory = 0x3 + TraceOriginUProbe = 0x3 + TraceOriginMemory = 0x4 ) type ApmSpanID [8]byte diff --git a/support/types_def.go b/support/types_def.go index d9f1c4282..6f9eaf083 100644 --- a/support/types_def.go +++ b/support/types_def.go @@ -101,6 +101,7 @@ const ( TraceOriginUnknown = C.TRACE_UNKNOWN TraceOriginSampling = C.TRACE_SAMPLING TraceOriginOffCPU = C.TRACE_OFF_CPU + TraceOriginUProbe = C.TRACE_UPROBE TraceOriginMemory = C.TRACE_MEMORY ) diff --git a/tools/coredump/modulestore/store.go b/tools/coredump/modulestore/store.go index 5b7d62dd7..a18b2eb5f 100644 --- a/tools/coredump/modulestore/store.go +++ b/tools/coredump/modulestore/store.go @@ -138,7 +138,7 @@ func (store *Store) OpenReadAt(id ID) (*ModuleReader, error) { file, err := zstpak.Open(localPath) if err != nil { - return nil, fmt.Errorf("failed to open local file: %w", err) + return nil, fmt.Errorf("failed to open local file %s: %w", localPath, err) } reader := &ModuleReader{ diff --git a/tracer/tracer.go b/tracer/tracer.go index 9cf37662c..b9766c062 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -145,6 +145,12 @@ type Config struct { // IncludeEnvVars holds a list of environment variables that should be captured and reported // from processes IncludeEnvVars libpf.Set[string] + // UProbes holds a list of executable:symbol elements to which + // a uprobe will be attached. + UProbeLinks []string + // LoadProbe inidicates whether the generic eBPF program should be loaded + // without being attached to something. + LoadProbe bool } // hookPoint specifies the group and name of the hooked point in the kernel. @@ -398,13 +404,47 @@ func initializeMapsAndPrograms(kmod *kallsyms.Module, cfg *Config) ( return nil, nil, fmt.Errorf("failed to load perf eBPF programs: %v", err) } + if cfg.OffCPUThreshold > 0 || len(cfg.UProbeLinks) > 0 || cfg.LoadProbe { + // Load the tail call destinations if any kind of event profiling is enabled. + if err = loadProbeUnwinders(coll, ebpfProgs, ebpfMaps["kprobe_progs"], tailCallProgs, + cfg.BPFVerifierLogLevel, ebpfMaps["perf_progs"].FD()); err != nil { + return nil, nil, fmt.Errorf("failed to load kprobe eBPF programs: %v", err) + } + } + if cfg.OffCPUThreshold > 0 { - if err = loadKProbeUnwinders(coll, ebpfProgs, ebpfMaps["kprobe_progs"], tailCallProgs, + offCPUProgs := []progLoaderHelper{ + { + name: "finish_task_switch", + noTailCallTarget: true, + enable: true, + }, + { + name: "tracepoint__sched_switch", + noTailCallTarget: true, + enable: true, + }, + } + if err = loadProbeUnwinders(coll, ebpfProgs, ebpfMaps["kprobe_progs"], offCPUProgs, cfg.BPFVerifierLogLevel, ebpfMaps["perf_progs"].FD()); err != nil { return nil, nil, fmt.Errorf("failed to load kprobe eBPF programs: %v", err) } } + if len(cfg.UProbeLinks) > 0 || cfg.LoadProbe { + uprobeProgs := []progLoaderHelper{ + { + name: "uprobe__generic", + noTailCallTarget: true, + enable: true, + }, + } + if err = loadProbeUnwinders(coll, ebpfProgs, ebpfMaps["kprobe_progs"], uprobeProgs, + cfg.BPFVerifierLogLevel, ebpfMaps["perf_progs"].FD()); err != nil { + return nil, nil, fmt.Errorf("failed to load uprobe eBPF programs: %v", err) + } + } + if err = loadSystemConfig(coll, ebpfMaps, kmod, cfg.IncludeTracers, cfg.OffCPUThreshold, cfg.FilterErrorFrames); err != nil { return nil, nil, fmt.Errorf("failed to load system config: %v", err) @@ -578,31 +618,16 @@ func progArrayReferences(perfTailCallMapFD int, insns asm.Instructions) []int { return insNos } -// loadKProbeUnwinders reuses large parts of loadPerfUnwinders. By default all eBPF programs -// are written as perf event eBPF programs. loadKProbeUnwinders dynamically rewrites the -// specification of these programs to kprobe eBPF programs and adjusts tail call maps. -func loadKProbeUnwinders(coll *cebpf.CollectionSpec, ebpfProgs map[string]*cebpf.Program, - tailcallMap *cebpf.Map, tailCallProgs []progLoaderHelper, +// loadProbeUnwinders reuses large parts of loadPerfUnwinders. By default all eBPF programs +// are written as perf event eBPF programs. loadProbeUnwinders dynamically rewrites the +// specification of these programs to xProbe eBPF programs and adjusts tail call maps. +func loadProbeUnwinders(coll *cebpf.CollectionSpec, ebpfProgs map[string]*cebpf.Program, + tailcallMap *cebpf.Map, progs []progLoaderHelper, bpfVerifierLogLevel uint32, perfTailCallMapFD int) error { programOptions := cebpf.ProgramOptions{ LogLevel: cebpf.LogLevel(bpfVerifierLogLevel), } - progs := make([]progLoaderHelper, len(tailCallProgs)+2) - copy(progs, tailCallProgs) - progs = append(progs, - progLoaderHelper{ - name: "finish_task_switch", - noTailCallTarget: true, - enable: true, - }, - progLoaderHelper{ - name: "tracepoint__sched_switch", - noTailCallTarget: true, - enable: true, - }, - ) - for _, unwindProg := range progs { if !unwindProg.enable { continue @@ -893,7 +918,11 @@ func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *host.Trace { EnvVars: procMeta.EnvVariables, } - if trace.Origin != support.TraceOriginSampling && trace.Origin != support.TraceOriginOffCPU { + switch trace.Origin { + case support.TraceOriginSampling: + case support.TraceOriginOffCPU: + case support.TraceOriginUProbe: + default: log.Warnf("Skip handling trace from unexpected %d origin", trace.Origin) return nil } @@ -1144,6 +1173,27 @@ func (t *Tracer) StartOffCPUProfiling() error { return nil } +func (t *Tracer) AttachUProbes(uprobes []string) error { + uProbeProg, ok := t.ebpfProgs["uprobe__generic"] + if !ok { + return errors.New("uprobe__generic is not available") + } + for _, uprobeStr := range uprobes { + split := strings.SplitN(uprobeStr, ":", 2) + + exec, err := link.OpenExecutable(split[0]) + if err != nil { + return err + } + uprobeLink, err := exec.Uprobe(split[1], uProbeProg, nil) + if err != nil { + return err + } + t.hooks[hookPoint{group: "uprobe", name: uprobeStr}] = uprobeLink + } + return nil +} + // TraceProcessor gets the trace processor. func (t *Tracer) TraceProcessor() tracehandler.TraceProcessor { return t.processManager