diff --git a/.github/workflows/unit-test-on-pull-request.yml b/.github/workflows/unit-test-on-pull-request.yml index f23859eb9..8ee0cd777 100644 --- a/.github/workflows/unit-test-on-pull-request.yml +++ b/.github/workflows/unit-test-on-pull-request.yml @@ -251,7 +251,8 @@ jobs: - name: Get parcagpu image digest id: parcagpu-digest run: | - digest=$(docker buildx imagetools inspect ghcr.io/parca-dev/parcagpu:latest --format '{{.Digest}}' 2>/dev/null || echo "unknown") + digest=$(docker buildx imagetools inspect ghcr.io/parca-dev/parcagpu:latest --raw 2>/dev/null | sha256sum | awk '{print $1}') + digest=${digest:-unknown} echo "digest=${digest}" >> "$GITHUB_OUTPUT" - name: Cache parcagpu library uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4 diff --git a/LICENSES/github.com/parca-dev/usdt/LICENSE b/LICENSES/github.com/parca-dev/usdt/LICENSE new file mode 100644 index 000000000..6392310dd --- /dev/null +++ b/LICENSES/github.com/parca-dev/usdt/LICENSE @@ -0,0 +1,199 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (which shall not include combinations of the Work with other works). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based upon (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and derivative works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control + systems, and issue tracking systems that are managed by, or on behalf + of, the Licensor for the purpose of discussing and improving the Work, + but excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to use, reproduce, modify, distribute, prepare + Derivative Works of, and publicly display the Work and such Derivative + Works in all media and formats whether now known or hereafter devised. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright notice to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Support. You are not required to accept + warranty or support for the Work from any Contributor. However, + you may choose to offer and charge a fee for warranty, support, + indemnity or other liability obligations consistent with this + License. When accepting any such obligations on your own behalf + or on behalf of another Contributor, you must obtain the + Contributor's express prior written consent and acknowledge + that the Contributor may be liable to You for any damages + incurred by You as a result of accepting such warranty or support. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. Don't include + the brackets! The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "license" line as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/go.mod b/go.mod index ae964b67c..ee4b6022a 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/minio/sha256-simd v1.0.1 github.com/open-telemetry/sig-profiling/tools/profcheck v0.0.0-20260303084341-52f633d434c9 github.com/parca-dev/oomprof v0.1.6 + github.com/parca-dev/usdt v0.0.2 github.com/peterbourgon/ff/v3 v3.4.0 github.com/sirupsen/logrus v1.9.4 github.com/stretchr/testify v1.11.1 diff --git a/go.sum b/go.sum index 96df409a3..5b0bd436c 100644 --- a/go.sum +++ b/go.sum @@ -196,6 +196,8 @@ github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/parca-dev/oomprof v0.1.6 h1:potfd09aphNKqsIF54ZsiddTvksVMjQiaKnczFOsVGM= github.com/parca-dev/oomprof v0.1.6/go.mod h1:iqI6XrmiNWOa8m2vEIKo+GtQrqbWCMLFpBWuk8RuAPs= +github.com/parca-dev/usdt v0.0.2 h1:bpKQycQ++zV8pwkMaJSxZS07XnEXqO3rkHcLYFJDTl4= +github.com/parca-dev/usdt v0.0.2/go.mod h1:bjh3OTksk+pyP7WsHWlRKWaMSJTUr0gx0piZ/tAv6/w= github.com/peterbourgon/ff/v3 v3.4.0 h1:QBvM/rizZM1cB0p0lGMdmR7HxZeI/ZrBWB4DqLkMUBc= github.com/peterbourgon/ff/v3 v3.4.0/go.mod h1:zjJVUhx+twciwfDl0zBcFzl4dW8axCRyXE/eKY9RztQ= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= diff --git a/libpf/pfelf/usdt.go b/libpf/pfelf/usdt.go index 33eee5bd9..db4019599 100644 --- a/libpf/pfelf/usdt.go +++ b/libpf/pfelf/usdt.go @@ -4,139 +4,57 @@ package pfelf // import "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" import ( - "encoding/binary" - "strings" -) - -// USDTProbe represents a USDT probe found in ELF with file-offset-adjusted addresses -type USDTProbe struct { - Provider string - Name string - Location uint64 // File offset for uprobe attachment - Base uint64 // Original base address from note - SemaphoreOffset uint64 // File offset for semaphore - Arguments string -} + "debug/elf" -// ParseUSDTProbes reads USDT probe information from ELF .note.stapsdt section. -// It applies prelink adjustments if .stapsdt.base section exists, and converts -// virtual addresses to file offsets suitable for uprobe attachment. -func (f *File) ParseUSDTProbes() ([]USDTProbe, error) { - // LoadSections populates f.Sections from section headers. - // It is idempotent and required for the .note.stapsdt lookup below. - if err := f.LoadSections(); err != nil { - return nil, err - } + "github.com/parca-dev/usdt" +) - var probes []USDTProbe +// USDTProbe is an alias for usdt.Probe for backwards compatibility. +type USDTProbe = usdt.Probe - // Find .note.stapsdt section - var stapsdt *Section - for i := range f.Sections { - if f.Sections[i].Name == ".note.stapsdt" { - stapsdt = &f.Sections[i] - break - } - } - if stapsdt == nil { - return nil, nil // No USDT probes in this binary - } +// pfelfELFReader adapts pfelf.File to the usdt.ELFReader interface. +type pfelfELFReader struct { + f *File +} - data, err := stapsdt.Data(16 * 1024) - if err != nil { +func (r *pfelfELFReader) Sections() ([]usdt.ELFSection, error) { + if err := r.f.LoadSections(); err != nil { return nil, err } - - // Find .stapsdt.base section address for prelink adjustment - var baseAddr uint64 - for i := range f.Sections { - if f.Sections[i].Name == ".stapsdt.base" { - baseAddr = f.Sections[i].Addr - break - } - } - - // Parse note entries - offset := 0 - for offset < len(data) { - if offset+12 > len(data) { - break - } - - // Note header: namesz(4) + descsz(4) + type(4) - namesz := binary.LittleEndian.Uint32(data[offset : offset+4]) - descsz := binary.LittleEndian.Uint32(data[offset+4 : offset+8]) - noteType := binary.LittleEndian.Uint32(data[offset+8 : offset+12]) - offset += 12 - - if noteType != 3 { // NT_STAPSDT - // Skip this note - nameEnd := offset + int((namesz+3)&^3) // align to 4 bytes - descEnd := nameEnd + int((descsz+3)&^3) - offset = descEnd - continue - } - - // Skip owner name (should be "stapsdt") - nameEnd := offset + int((namesz+3)&^3) - - if nameEnd+int(descsz) > len(data) { - break - } - - // Parse descriptor - desc := data[nameEnd : nameEnd+int(descsz)] - if len(desc) < 24 { // 3 uint64 values - offset = nameEnd + int((descsz+3)&^3) - continue - } - - location := binary.LittleEndian.Uint64(desc[0:8]) - noteBase := binary.LittleEndian.Uint64(desc[8:16]) - semaphore := binary.LittleEndian.Uint64(desc[16:24]) - - // Apply prelink adjustment if .stapsdt.base section exists - // See: https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation - if baseAddr != 0 && noteBase != 0 { - diff := baseAddr - noteBase - location += diff - if semaphore != 0 { - semaphore += diff - } - } - - // Convert virtual address to file offset for uprobe attachment - prog := f.findVirtualAddressProg(location) - if prog != nil { - location = location - prog.Vaddr + prog.Off - } - - // Convert semaphore virtual address to file offset - var semaphoreFileOffset uint64 - if semaphore != 0 { - semaProg := f.findVirtualAddressProg(semaphore) - if semaProg != nil { - semaphoreFileOffset = semaphore - semaProg.Vaddr + semaProg.Off + sections := make([]usdt.ELFSection, len(r.f.Sections)) + for i, s := range r.f.Sections { + sections[i] = usdt.ELFSection{ + Name: s.Name, + Addr: s.Addr, + } + // Only read data for sections the parser needs + if s.Name == ".note.stapsdt" || s.Name == ".stapsdt.base" { + data, err := s.Data(16 * 1024) + if err != nil { + return nil, err } + sections[i].Data = data } + } + return sections, nil +} - // Parse strings: provider\0probe\0arguments\0 - stringData := desc[24:] - strings := strings.Split(string(stringData), "\x00") - if len(strings) >= 3 { - probe := USDTProbe{ - Provider: strings[0], - Name: strings[1], - Location: location, - Base: noteBase, - SemaphoreOffset: semaphoreFileOffset, - Arguments: strings[2], - } - probes = append(probes, probe) +func (r *pfelfELFReader) LoadSegments() []usdt.ELFProg { + var segs []usdt.ELFProg + for _, p := range r.f.Progs { + if elf.ProgType(p.Type) == elf.PT_LOAD { + segs = append(segs, usdt.ELFProg{ + Vaddr: p.Vaddr, + Memsz: p.Memsz, + Off: p.Off, + }) } - - offset = nameEnd + int((descsz+3)&^3) } + return segs +} - return probes, nil +// ParseUSDTProbes reads USDT probe information from ELF .note.stapsdt section. +// It delegates to the usdt package via the pfelf adapter. +func (f *File) ParseUSDTProbes() ([]usdt.Probe, error) { + return usdt.ParseProbes(&pfelfELFReader{f: f}) } diff --git a/libpf/pfelf/usdt_args.go b/libpf/pfelf/usdt_args.go index 78ce9cdcc..8810c77d4 100644 --- a/libpf/pfelf/usdt_args.go +++ b/libpf/pfelf/usdt_args.go @@ -12,7 +12,7 @@ import ( "strings" "unsafe" - "go.opentelemetry.io/ebpf-profiler/support/usdt" + "github.com/parca-dev/usdt" ) // x86_64 register name to ID mapping diff --git a/libpf/pfelf/usdt_args_test.go b/libpf/pfelf/usdt_args_test.go index 0c71244f8..fefbef4bf 100644 --- a/libpf/pfelf/usdt_args_test.go +++ b/libpf/pfelf/usdt_args_test.go @@ -7,7 +7,7 @@ import ( "runtime" "testing" - "go.opentelemetry.io/ebpf-profiler/support/usdt" + "github.com/parca-dev/usdt" ) // Common test cases that work on all architectures diff --git a/processmanager/ebpf/ebpf.go b/processmanager/ebpf/ebpf.go index c83f55649..0a929aad4 100644 --- a/processmanager/ebpf/ebpf.go +++ b/processmanager/ebpf/ebpf.go @@ -17,6 +17,7 @@ import ( cebpf "github.com/cilium/ebpf" "github.com/cilium/ebpf/features" "github.com/cilium/ebpf/link" + "github.com/parca-dev/usdt" "go.opentelemetry.io/ebpf-profiler/internal/log" "go.opentelemetry.io/ebpf-profiler/tracer/types" "golang.org/x/exp/constraints" @@ -164,77 +165,8 @@ func LoadMaps(ctx context.Context, includeTracers types.IncludedTracers, maps ma return impl, nil } -type linkCloser struct { - unloadLink []link.Link - unloadSpecIDs []uint32 // spec IDs to delete when unload happens - specMap *cebpf.Map // reference to the spec map for cleanup -} - -// populateUSDTSpecMaps parses USDT probe arguments and populates the BPF spec maps. -// It returns the assigned spec IDs for each probe. -// If a probe has no arguments or parsing fails, it receives spec ID 0. -func populateUSDTSpecMaps(probes []pfelf.USDTProbe, specMap *cebpf.Map, startSpecID uint32) ([]uint32, error) { - specIDs := make([]uint32, len(probes)) - currentSpecID := startSpecID - - for i, probe := range probes { - if probe.Arguments == "" { - // No arguments, use spec ID 0 - specIDs[i] = 0 - continue - } - - // Parse the argument specification - spec, err := pfelf.ParseUSDTArguments(probe.Arguments) - if err != nil { - log.Warnf("Failed to parse USDT arguments for %s:%s (%s): %v", - probe.Provider, probe.Name, probe.Arguments, err) - specIDs[i] = 0 - continue - } - - // Assign a spec ID - specID := currentSpecID - currentSpecID++ - specIDs[i] = specID - - // Store the spec in the map - if err := specMap.Put(unsafe.Pointer(&specID), pfelf.USDTSpecToBytes(spec)); err != nil { - return nil, fmt.Errorf("failed to store USDT spec for %s:%s: %w", - probe.Provider, probe.Name, err) - } - } - - return specIDs, nil -} - -func (lc *linkCloser) Unload() error { - var errs []error - if lc.unloadLink != nil { - for _, l := range lc.unloadLink { - if err := l.Close(); err != nil { - errs = append(errs, err) - } - } - } - // Clean up spec IDs associated with unload - if lc.specMap != nil && len(lc.unloadSpecIDs) > 0 { - for _, specID := range lc.unloadSpecIDs { - if specID != 0 { - if err := lc.specMap.Delete(unsafe.Pointer(&specID)); err != nil { - log.Warnf("Failed to delete spec ID %d from map: %v", specID, err) - errs = append(errs, err) - } else { - log.Debugf("Deleted spec ID %d from map during unload", specID) - } - } - } - } - return errors.Join(errs...) -} - // deleteSpecIDs removes USDT spec map entries that won't be cleaned up -// by a LinkCloser (e.g. when multi-probe attach fails before returning a link). +// by a ProbeLinks (e.g. when multi-probe attach fails before returning a link). func (impl *ebpfMapsImpl) deleteSpecIDs(specIDs []uint32) { for _, specID := range specIDs { if specID != 0 { @@ -252,12 +184,10 @@ func (impl *ebpfMapsImpl) deleteSpecIDs(specIDs []uint32) { // function automatically falls back to single-shot per-probe attachment. func (impl *ebpfMapsImpl) AttachUSDTProbes(pid libpf.PID, path, multiProgName string, probes []pfelf.USDTProbe, cookies []uint64, singleProgNames []string) (interpreter.LinkCloser, error) { - useMulti := util.HasMultiUprobeSupport() containerPath := fmt.Sprintf("/proc/%d/root/%s", pid, path) exe, err := link.OpenExecutable(containerPath) if err != nil { - // The upstack code will swallow file not found errors so drop a crumb. log.Warnf("failed to open executable in AttachUSDTProbes %v", err) return nil, err } @@ -266,71 +196,37 @@ func (impl *ebpfMapsImpl) AttachUSDTProbes(pid libpf.PID, path, multiProgName st impl.userProgs = make(map[string]*cebpf.Program) } - // Parse USDT arguments and populate spec maps using the helper - var specIDs []uint32 - // Get the starting spec ID and update nextSpecID under lock + // Reserve spec IDs under lock. startSpecID := func() uint32 { impl.specIDLock.Lock() defer impl.specIDLock.Unlock() - specID := impl.nextSpecID + id := impl.nextSpecID impl.nextSpecID += uint32(len(probes)) - return specID + return id }() - // Populate USDT spec maps directly - specIDs, err = populateUSDTSpecMaps(probes, impl.usdtSpecsMap, startSpecID) + specIDs, err := usdt.PopulateSpecMap(impl.usdtSpecsMap, probes, startSpecID) if err != nil { return nil, fmt.Errorf("failed to populate USDT spec maps: %w", err) } - names := make([]string, 0, len(probes)) - addresses := make([]uint64, 0, len(probes)) - offsets := make([]uint64, 0, len(probes)) - for _, p := range probes { - names = append(names, p.Name) - addresses = append(addresses, p.Location) - offsets = append(offsets, p.SemaphoreOffset) - } - - // Merge spec IDs (high 32 bits) with user cookies (low 32 bits) - // BPF cookie format: [spec_id (32 bits) | user_cookie (32 bits)] - var finalCookies []uint64 - if len(specIDs) > 0 { - finalCookies = make([]uint64, len(specIDs)) - for i, specID := range specIDs { - // Spec ID goes in high 32 bits - finalCookies[i] = uint64(specID) << 32 - // If user provided cookies, merge them into low 32 bits - if cookies != nil && i < len(cookies) { - userCookie := uint32(cookies[i] & 0xFFFFFFFF) - finalCookies[i] |= uint64(userCookie) - } - } - // Note: IP-to-spec-ID map is already populated by PopulateUSDTSpecMaps - } else if cookies != nil { - // No spec IDs, just use user cookies in low 32 bits - finalCookies = make([]uint64, len(cookies)) - for i, cookie := range cookies { - finalCookies[i] = cookie & 0xFFFFFFFF - } - } + finalCookies := usdt.MergeCookies(specIDs, cookies) // Try multi-probe first if the kernel supports it and a dispatcher was given. - if multiProgName != "" && useMulti { + if multiProgName != "" && util.HasMultiUprobeSupport() { lc, multiErr := impl.attachMultiProbe(exe, path, pid, multiProgName, - names, addresses, offsets, finalCookies, specIDs) + probes, finalCookies, specIDs) if multiErr == nil { return lc, nil } if singleProgNames == nil { - // Clean up spec IDs that won't be freed by a LinkCloser. impl.deleteSpecIDs(specIDs) return nil, multiErr } log.Warnf("multi-probe attach failed (%v), falling back to single-shot", multiErr) } - // Single-shot mode: one program per probe (direct path or fallback). + // Single-shot mode: one program per probe. if singleProgNames == nil { return nil, fmt.Errorf("singleProgNames required when multi-probe not available") } @@ -352,48 +248,20 @@ func (impl *ebpfMapsImpl) AttachUSDTProbes(pid libpf.PID, path, multiProgName st progs[i] = prog } - var links []link.Link - for i, probe := range probes { - prog := progs[i] - if prog == nil { - for _, l := range links { - l.Close() - } - return nil, fmt.Errorf("program %d is nil for probe %s", i, probe.Name) - } - - uprobeOpts := &link.UprobeOptions{ - Address: probe.Location, - RefCtrOffset: probe.SemaphoreOffset, - } - if finalCookies != nil && i < len(finalCookies) { - uprobeOpts.Cookie = finalCookies[i] - } - - l, err := exe.Uprobe(probe.Name, prog, uprobeOpts) - if err != nil { - for _, lnk := range links { - lnk.Close() - } - return nil, fmt.Errorf("failed to attach USDT probe %s at location 0x%x: %w", - probe.Name, probe.Location, err) - } - links = append(links, l) + pl, err := usdt.AttachUprobes(exe, impl.usdtSpecsMap, probes, progs, finalCookies) + if err != nil { + return nil, err } - log.Infof("Attached %d individual probes to %s in PID %d", len(links), path, pid) - return &linkCloser{ - unloadLink: links, - unloadSpecIDs: specIDs, - specMap: impl.usdtSpecsMap, - }, nil + log.Infof("Attached %d individual probes to %s in PID %d", len(probes), path, pid) + return pl, nil } // attachMultiProbe loads the multi-probe dispatcher and attaches all probes // via UprobeMulti. func (impl *ebpfMapsImpl) attachMultiProbe( exe *link.Executable, path string, pid libpf.PID, multiProgName string, - names []string, addresses, offsets, finalCookies []uint64, + probes []pfelf.USDTProbe, finalCookies []uint64, specIDs []uint32, ) (interpreter.LinkCloser, error) { prog := impl.userProgs[multiProgName] @@ -404,6 +272,15 @@ func (impl *ebpfMapsImpl) attachMultiProbe( prog = impl.userProgs[multiProgName] } + names := make([]string, len(probes)) + addresses := make([]uint64, len(probes)) + offsets := make([]uint64, len(probes)) + for i, p := range probes { + names[i] = p.Name + addresses[i] = p.Location + offsets[i] = p.SemaphoreOffset + } + lnk, err := exe.UprobeMulti(names, prog, &link.UprobeMultiOptions{ Addresses: addresses, RefCtrOffsets: offsets, @@ -415,11 +292,7 @@ func (impl *ebpfMapsImpl) attachMultiProbe( } log.Infof("Attached probe %s to usdt %s in PID %d", multiProgName, path, pid) - return &linkCloser{ - unloadLink: []link.Link{lnk}, - unloadSpecIDs: specIDs, - specMap: impl.usdtSpecsMap, - }, nil + return &usdt.ProbeLinks{Links: []link.Link{lnk}, SpecIDs: specIDs, SpecMap: impl.usdtSpecsMap}, nil } // UpdateProgArray loads a USDT eBPF program by name and inserts it into @@ -533,7 +406,7 @@ func (impl *ebpfMapsImpl) AttachUprobe(pid libpf.PID, path string, offset uint64 path, offset, err) } log.Infof("Attached uprobe %s to %s at offset 0x%x in PID %d", progName, path, offset, pid) - return &linkCloser{unloadLink: []link.Link{lnk}}, nil + return &usdt.ProbeLinks{Links: []link.Link{lnk}}, nil } func (impl *ebpfMapsImpl) CoredumpTest() bool { diff --git a/support/ebpf/Makefile b/support/ebpf/Makefile index 1c5f60e8a..d616176ba 100644 --- a/support/ebpf/Makefile +++ b/support/ebpf/Makefile @@ -29,13 +29,37 @@ else TARGET_FLAGS = -target x86_64-linux-gnu endif +# Resolve the path to the parca-dev/usdt eBPF headers via the Go module +# cache (honors any `replace` directive in go.mod). The headers +# (usdt_defs.h, usdt_args.h) live there as the single source of truth and +# are not duplicated in this repo. +# +# Lazy assignment (`=`, not `:=`): only computed when an actual compile +# rule expands FLAGS, so `clean`/`format`/`lint` don't trigger it. We +# `go mod download` first because `go list -m -f '{{.Dir}}'` does not +# auto-download a specific module path on a cold cache. +USDT_HEADERS = $(shell cd $(CURDIR)/../.. && \ + go mod download github.com/parca-dev/usdt >/dev/null && \ + go list -m -f '{{.Dir}}' github.com/parca-dev/usdt)/ebpf + # Use -g to generate the btf section in the resulting binary. +# +# We remap two source roots to stable strings so the compiled .o (and the +# linked tracer.ebpf.* blob, since BTF/.BTF.ext survive `--strip-debug`) +# are byte-identical regardless of where the build happens: +# * $(CURDIR) — this directory (support/ebpf), already remapped. +# * $(USDT_HEADERS) — the parca-dev/usdt module's ebpf/ headers, which +# live in the Go module cache (different absolute +# path locally vs. in CI). Without this remap, the +# absolute paths of usdt_defs.h/usdt_args.h get +# baked into BTF and the blob diffs across machines. FLAGS=$(TARGET_FLAGS) -g \ -fno-jump-tables \ -nostdlib \ -nostdinc \ -ffreestanding \ -O2 -emit-llvm -c $< \ + -I$(USDT_HEADERS) \ -Wall -Wextra -Werror \ -Wno-address-of-packed-member \ -Wno-unused-label \ @@ -44,7 +68,9 @@ FLAGS=$(TARGET_FLAGS) -g \ -D__SOURCE_DATE_EPOCH__=0 \ -std=gnu17 \ -Xclang -fdebug-prefix-map=$(CURDIR)=$(REPRODUCIBLE_PREFIX) \ - -Xclang -fmacro-prefix-map=$(CURDIR)=$(REPRODUCIBLE_PREFIX) + -Xclang -fmacro-prefix-map=$(CURDIR)=$(REPRODUCIBLE_PREFIX) \ + -Xclang -fdebug-prefix-map=$(USDT_HEADERS)=usdt \ + -Xclang -fmacro-prefix-map=$(USDT_HEADERS)=usdt SRCS := $(wildcard *.ebpf.c) OBJS := $(SRCS:.c=.$(TARGET_ARCH).o) diff --git a/support/ebpf/tracer.ebpf.amd64 b/support/ebpf/tracer.ebpf.amd64 index 057d40ce0..44d385d30 100644 Binary files a/support/ebpf/tracer.ebpf.amd64 and b/support/ebpf/tracer.ebpf.amd64 differ diff --git a/support/ebpf/tracer.ebpf.arm64 b/support/ebpf/tracer.ebpf.arm64 index fa8da5b80..897a7017a 100644 Binary files a/support/ebpf/tracer.ebpf.arm64 and b/support/ebpf/tracer.ebpf.arm64 differ diff --git a/support/ebpf/usdt.ebpf.c b/support/ebpf/usdt.ebpf.c index a75ae3fc3..8dfaef427 100644 --- a/support/ebpf/usdt.ebpf.c +++ b/support/ebpf/usdt.ebpf.c @@ -2,7 +2,7 @@ #include "tracemgmt.h" #include "types.h" -#include "usdt.h" +#include "usdt_defs.h" #ifndef BPF_USDT_MAX_SPEC_CNT #define BPF_USDT_MAX_SPEC_CNT 256 diff --git a/support/ebpf/usdt.h b/support/ebpf/usdt.h deleted file mode 100644 index 7cb23687c..000000000 --- a/support/ebpf/usdt.h +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -#ifndef OPTI_USDT_H -#define OPTI_USDT_H - -#include "types.h" - -// USDT argument specification structures -enum bpf_usdt_arg_type { - BPF_USDT_ARG_CONST, - BPF_USDT_ARG_REG, - BPF_USDT_ARG_REG_DEREF, -}; - -// Register IDs -enum bpf_usdt_register { - BPF_USDT_REG_NONE = 0, - - // x86_64 registers (1-17) - BPF_USDT_REG_RAX = 1, - BPF_USDT_REG_RBX = 2, - BPF_USDT_REG_RCX = 3, - BPF_USDT_REG_RDX = 4, - BPF_USDT_REG_RSI = 5, - BPF_USDT_REG_RDI = 6, - BPF_USDT_REG_RBP = 7, - BPF_USDT_REG_RSP = 8, - BPF_USDT_REG_R8 = 9, - BPF_USDT_REG_R9 = 10, - BPF_USDT_REG_R10 = 11, - BPF_USDT_REG_R11 = 12, - BPF_USDT_REG_R12 = 13, - BPF_USDT_REG_R13 = 14, - BPF_USDT_REG_R14 = 15, - BPF_USDT_REG_R15 = 16, - BPF_USDT_REG_RIP = 17, - - // ARM64 registers (32-64) - BPF_USDT_REG_X0 = 32, - BPF_USDT_REG_X1 = 33, - BPF_USDT_REG_X2 = 34, - BPF_USDT_REG_X3 = 35, - BPF_USDT_REG_X4 = 36, - BPF_USDT_REG_X5 = 37, - BPF_USDT_REG_X6 = 38, - BPF_USDT_REG_X7 = 39, - BPF_USDT_REG_X8 = 40, - BPF_USDT_REG_X9 = 41, - BPF_USDT_REG_X10 = 42, - BPF_USDT_REG_X11 = 43, - BPF_USDT_REG_X12 = 44, - BPF_USDT_REG_X13 = 45, - BPF_USDT_REG_X14 = 46, - BPF_USDT_REG_X15 = 47, - BPF_USDT_REG_X16 = 48, - BPF_USDT_REG_X17 = 49, - BPF_USDT_REG_X18 = 50, - BPF_USDT_REG_X19 = 51, - BPF_USDT_REG_X20 = 52, - BPF_USDT_REG_X21 = 53, - BPF_USDT_REG_X22 = 54, - BPF_USDT_REG_X23 = 55, - BPF_USDT_REG_X24 = 56, - BPF_USDT_REG_X25 = 57, - BPF_USDT_REG_X26 = 58, - BPF_USDT_REG_X27 = 59, - BPF_USDT_REG_X28 = 60, - BPF_USDT_REG_X29 = 61, // FP - BPF_USDT_REG_X30 = 62, // LR - BPF_USDT_REG_SP = 63, - BPF_USDT_REG_PC = 64, -}; - -// USDT argument specification structures -#define BPF_USDT_MAX_ARG_CNT 12 - -struct bpf_usdt_arg_spec { - u64 val_off; // Constant value OR memory offset from register - u32 arg_type; // CONST, REG, or REG_DEREF (enum bpf_usdt_arg_type) - u8 reg_id; // Register ID (enum bpf_usdt_register) - bool arg_signed; // Whether argument is signed - s8 arg_bitshift; // Bits to shift for size adjustment (64 - arg_sz*8) - bool arg_is_float; // Whether argument is floating-point - u8 _pad[1]; // Padding for alignment -}; - -struct bpf_usdt_spec { - struct bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT]; - u64 usdt_cookie; - s16 arg_cnt; - u8 _pad[6]; // Padding for alignment -}; - -#endif // OPTI_USDT_H diff --git a/support/ebpf/usdt_args.h b/support/ebpf/usdt_args.h deleted file mode 100644 index d35a912dc..000000000 --- a/support/ebpf/usdt_args.h +++ /dev/null @@ -1,244 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -#ifndef OPTI_USDT_ARGS_H -#define OPTI_USDT_ARGS_H - -#include "bpfdefs.h" -#include "usdt.h" - -// Forward declarations for USDT maps (defined in usdt.ebpf.c) -extern struct usdt_specs_t __bpf_usdt_specs; - -// Helper to get the offset into pt_regs for a given register ID -// This returns the byte offset that can be used with bpf_probe_read_kernel -static EBPF_INLINE int __bpf_usdt_get_reg_off(u8 reg_id) -{ -#if defined(__x86_64__) - switch (reg_id) { - case BPF_USDT_REG_RAX: return __builtin_offsetof(struct pt_regs, ax); - case BPF_USDT_REG_RBX: return __builtin_offsetof(struct pt_regs, bx); - case BPF_USDT_REG_RCX: return __builtin_offsetof(struct pt_regs, cx); - case BPF_USDT_REG_RDX: return __builtin_offsetof(struct pt_regs, dx); - case BPF_USDT_REG_RSI: return __builtin_offsetof(struct pt_regs, si); - case BPF_USDT_REG_RDI: return __builtin_offsetof(struct pt_regs, di); - case BPF_USDT_REG_RBP: return __builtin_offsetof(struct pt_regs, bp); - case BPF_USDT_REG_RSP: return __builtin_offsetof(struct pt_regs, sp); - case BPF_USDT_REG_R8: return __builtin_offsetof(struct pt_regs, r8); - case BPF_USDT_REG_R9: return __builtin_offsetof(struct pt_regs, r9); - case BPF_USDT_REG_R10: return __builtin_offsetof(struct pt_regs, r10); - case BPF_USDT_REG_R11: return __builtin_offsetof(struct pt_regs, r11); - case BPF_USDT_REG_R12: return __builtin_offsetof(struct pt_regs, r12); - case BPF_USDT_REG_R13: return __builtin_offsetof(struct pt_regs, r13); - case BPF_USDT_REG_R14: return __builtin_offsetof(struct pt_regs, r14); - case BPF_USDT_REG_R15: return __builtin_offsetof(struct pt_regs, r15); - case BPF_USDT_REG_RIP: return __builtin_offsetof(struct pt_regs, ip); - default: return -1; - } -#elif defined(__aarch64__) - switch (reg_id) { - case BPF_USDT_REG_X0: return __builtin_offsetof(struct pt_regs, regs[0]); - case BPF_USDT_REG_X1: return __builtin_offsetof(struct pt_regs, regs[1]); - case BPF_USDT_REG_X2: return __builtin_offsetof(struct pt_regs, regs[2]); - case BPF_USDT_REG_X3: return __builtin_offsetof(struct pt_regs, regs[3]); - case BPF_USDT_REG_X4: return __builtin_offsetof(struct pt_regs, regs[4]); - case BPF_USDT_REG_X5: return __builtin_offsetof(struct pt_regs, regs[5]); - case BPF_USDT_REG_X6: return __builtin_offsetof(struct pt_regs, regs[6]); - case BPF_USDT_REG_X7: return __builtin_offsetof(struct pt_regs, regs[7]); - case BPF_USDT_REG_X8: return __builtin_offsetof(struct pt_regs, regs[8]); - case BPF_USDT_REG_X9: return __builtin_offsetof(struct pt_regs, regs[9]); - case BPF_USDT_REG_X10: return __builtin_offsetof(struct pt_regs, regs[10]); - case BPF_USDT_REG_X11: return __builtin_offsetof(struct pt_regs, regs[11]); - case BPF_USDT_REG_X12: return __builtin_offsetof(struct pt_regs, regs[12]); - case BPF_USDT_REG_X13: return __builtin_offsetof(struct pt_regs, regs[13]); - case BPF_USDT_REG_X14: return __builtin_offsetof(struct pt_regs, regs[14]); - case BPF_USDT_REG_X15: return __builtin_offsetof(struct pt_regs, regs[15]); - case BPF_USDT_REG_X16: return __builtin_offsetof(struct pt_regs, regs[16]); - case BPF_USDT_REG_X17: return __builtin_offsetof(struct pt_regs, regs[17]); - case BPF_USDT_REG_X18: return __builtin_offsetof(struct pt_regs, regs[18]); - case BPF_USDT_REG_X19: return __builtin_offsetof(struct pt_regs, regs[19]); - case BPF_USDT_REG_X20: return __builtin_offsetof(struct pt_regs, regs[20]); - case BPF_USDT_REG_X21: return __builtin_offsetof(struct pt_regs, regs[21]); - case BPF_USDT_REG_X22: return __builtin_offsetof(struct pt_regs, regs[22]); - case BPF_USDT_REG_X23: return __builtin_offsetof(struct pt_regs, regs[23]); - case BPF_USDT_REG_X24: return __builtin_offsetof(struct pt_regs, regs[24]); - case BPF_USDT_REG_X25: return __builtin_offsetof(struct pt_regs, regs[25]); - case BPF_USDT_REG_X26: return __builtin_offsetof(struct pt_regs, regs[26]); - case BPF_USDT_REG_X27: return __builtin_offsetof(struct pt_regs, regs[27]); - case BPF_USDT_REG_X28: return __builtin_offsetof(struct pt_regs, regs[28]); - case BPF_USDT_REG_X29: return __builtin_offsetof(struct pt_regs, regs[29]); // FP - case BPF_USDT_REG_X30: return __builtin_offsetof(struct pt_regs, regs[30]); // LR - case BPF_USDT_REG_SP: return __builtin_offsetof(struct pt_regs, sp); - case BPF_USDT_REG_PC: return __builtin_offsetof(struct pt_regs, pc); - default: return -1; - } -#else - #error "Unsupported architecture for USDT" -#endif -} - -// Helper to read register value from pt_regs based on register ID -// Uses bpf_probe_read_kernel to safely read from the context pointer, -// avoiding BPF verifier issues with modified context pointers -static EBPF_INLINE int __bpf_usdt_get_reg_val(struct pt_regs *ctx, u8 reg_id, unsigned long *val) -{ - int reg_off = __bpf_usdt_get_reg_off(reg_id); - if (reg_off < 0) - return -1; - - return bpf_probe_read_kernel(val, sizeof(*val), (void *)ctx + reg_off); -} - -// Helper function to get spec_id from context -// The BPF cookie is split: high 32 bits = spec ID, low 32 bits = user cookie -static EBPF_INLINE int __bpf_usdt_spec_id(struct pt_regs *ctx) -{ - u64 cookie = bpf_get_attach_cookie(ctx); - return (u32)(cookie >> 32); -} - -// Helper function to get user cookie from context -// The BPF cookie is split: high 32 bits = spec ID, low 32 bits = user cookie -static EBPF_INLINE UNUSED u32 __bpf_usdt_cookie(struct pt_regs *ctx) -{ - u64 cookie = bpf_get_attach_cookie(ctx); - return (u32)(cookie & 0xFFFFFFFF); -} - -// libbpf-compatible function to fetch USDT arguments -static EBPF_INLINE UNUSED int bpf_usdt_arg(struct pt_regs *ctx, u64 arg_num, long *res) -{ - struct bpf_usdt_spec *spec; - struct bpf_usdt_arg_spec *arg_spec; - unsigned long val; - int err, spec_id; - - *res = 0; - - spec_id = __bpf_usdt_spec_id(ctx); - if (spec_id < 0) - return -1; - - spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); - if (!spec) - return -1; - - if (arg_num >= BPF_USDT_MAX_ARG_CNT || arg_num >= spec->arg_cnt) - return -1; - - arg_spec = &spec->args[arg_num]; - - // Read all fields into local variables to help BPF verifier - u32 arg_type = arg_spec->arg_type; - u64 val_off = arg_spec->val_off; - u8 reg_id = arg_spec->reg_id; - bool arg_signed = arg_spec->arg_signed; - s8 arg_bitshift = arg_spec->arg_bitshift; - - switch (arg_type) { - case BPF_USDT_ARG_CONST: - // Arg is just a constant ("-4@$-9" in USDT arg spec) - val = val_off; - break; - case BPF_USDT_ARG_REG: - // Arg is in a register (e.g, "8@%rax" in USDT arg spec) - err = __bpf_usdt_get_reg_val(ctx, reg_id, &val); - if (err) - return err; - break; - case BPF_USDT_ARG_REG_DEREF: - // Arg is in memory addressed by register, plus some offset - err = __bpf_usdt_get_reg_val(ctx, reg_id, &val); - if (err) - return err; - err = bpf_probe_read_user(&val, sizeof(val), (void *)val + val_off); - if (err) - return err; -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - val >>= arg_bitshift; -#endif - break; - default: return -1; - } - - // Cast arg from 1, 2, 4, or 8 bytes to final 8 byte size - val <<= arg_bitshift; - if (arg_signed) - val = ((long)val) >> arg_bitshift; - else - val = val >> arg_bitshift; - *res = val; - return 0; -} - -// clang-format off -// Individual argument extraction macros -// Usage: s32 arg0 = bpf_usdt_arg0(ctx); -#define bpf_usdt_arg0(ctx) ({ long _arg; bpf_usdt_arg(ctx, 0, &_arg); _arg; }) -#define bpf_usdt_arg1(ctx) ({ long _arg; bpf_usdt_arg(ctx, 1, &_arg); _arg; }) -#define bpf_usdt_arg2(ctx) ({ long _arg; bpf_usdt_arg(ctx, 2, &_arg); _arg; }) -#define bpf_usdt_arg3(ctx) ({ long _arg; bpf_usdt_arg(ctx, 3, &_arg); _arg; }) -#define bpf_usdt_arg4(ctx) ({ long _arg; bpf_usdt_arg(ctx, 4, &_arg); _arg; }) -#define bpf_usdt_arg5(ctx) ({ long _arg; bpf_usdt_arg(ctx, 5, &_arg); _arg; }) -#define bpf_usdt_arg6(ctx) ({ long _arg; bpf_usdt_arg(ctx, 6, &_arg); _arg; }) -#define bpf_usdt_arg7(ctx) ({ long _arg; bpf_usdt_arg(ctx, 7, &_arg); _arg; }) -#define bpf_usdt_arg8(ctx) ({ long _arg; bpf_usdt_arg(ctx, 8, &_arg); _arg; }) -#define bpf_usdt_arg9(ctx) ({ long _arg; bpf_usdt_arg(ctx, 9, &_arg); _arg; }) -#define bpf_usdt_arg10(ctx) ({ long _arg; bpf_usdt_arg(ctx, 10, &_arg); _arg; }) -#define bpf_usdt_arg11(ctx) ({ long _arg; bpf_usdt_arg(ctx, 11, &_arg); _arg; }) - -// The rest of this code is from libbpf -#ifndef ___bpf_concat -#define ___bpf_concat(a, b) a##b -#endif -#ifndef ___bpf_apply -#define ___bpf_apply(fn, n) ___bpf_concat(fn, n) -#endif -#ifndef ___bpf_nth -#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N -#endif -#ifndef ___bpf_narg -#define ___bpf_narg(...) \ - ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) -#endif - -#define ___bpf_usdt_args0() ctx -#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); _x; }) -#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); _x; }) -#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); _x; }) -#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); _x; }) -#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); _x; }) -#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); _x; }) -#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); _x; }) -#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); _x; }) -#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); _x; }) -#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); _x; }) -#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); _x; }) -#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); _x; }) -#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args) - -/* - * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for - * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes. - * Original struct pt_regs * context is preserved as 'ctx' argument. - */ -#define BPF_USDT(name, args...) \ -name(struct pt_regs *ctx); \ -static EBPF_INLINE typeof(name(0)) \ -____##name(UNUSED struct pt_regs *ctx, ##args); \ -typeof(name(0)) name(struct pt_regs *ctx) \ -{ \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - return ____##name(___bpf_usdt_args(args)); \ - _Pragma("GCC diagnostic pop") \ -} \ -static EBPF_INLINE typeof(name(0)) \ -____##name(UNUSED struct pt_regs *ctx, ##args) - -#define BPF_USDT_CALL(name, args...) \ - ____##name(___bpf_usdt_args(args)) - -// clang-format on - -#endif // OPTI_USDT_ARGS_H diff --git a/support/generate.sh b/support/generate.sh index 5d41e7c9d..8fbd398f9 100755 --- a/support/generate.sh +++ b/support/generate.sh @@ -29,25 +29,4 @@ fi # Clean up temporary files rm -rf _obj/ types_gen.go -# Generate support/usdt/types.go -echo "Generating support/usdt/types.go..." - -cd usdt - -# Generate types_gen.go without license (cgo adds its own header) -go tool cgo -godefs types_def.go > types_gen.go - -# Properly format the generated code -go fmt . - -if ! diff types_gen.go types.go; then - echo "Auto generated and existing code differ, please review and update support/usdt/types.go" - exit 1 -fi - -# Clean up temporary files -rm -rf _obj/ types_gen.go - -cd .. - echo "All type definitions are up to date" diff --git a/support/usdt/types.go b/support/usdt/types.go deleted file mode 100644 index c276e8be2..000000000 --- a/support/usdt/types.go +++ /dev/null @@ -1,88 +0,0 @@ -// Code generated by cmd/cgo -godefs; DO NOT EDIT. -// cgo -godefs types_def.go - -package usdt - -type ArgType = uint32 - -const ( - ArgConst = 0x0 - ArgReg = 0x1 - ArgRegDeref = 0x2 -) - -type Register = uint8 - -const ( - RegNone = 0x0 - - RegRax = 0x1 - RegRbx = 0x2 - RegRcx = 0x3 - RegRdx = 0x4 - RegRsi = 0x5 - RegRdi = 0x6 - RegRbp = 0x7 - RegRsp = 0x8 - RegR8 = 0x9 - RegR9 = 0xa - RegR10 = 0xb - RegR11 = 0xc - RegR12 = 0xd - RegR13 = 0xe - RegR14 = 0xf - RegR15 = 0x10 - RegRip = 0x11 - - RegX0 = 0x20 - RegX1 = 0x21 - RegX2 = 0x22 - RegX3 = 0x23 - RegX4 = 0x24 - RegX5 = 0x25 - RegX6 = 0x26 - RegX7 = 0x27 - RegX8 = 0x28 - RegX9 = 0x29 - RegX10 = 0x2a - RegX11 = 0x2b - RegX12 = 0x2c - RegX13 = 0x2d - RegX14 = 0x2e - RegX15 = 0x2f - RegX16 = 0x30 - RegX17 = 0x31 - RegX18 = 0x32 - RegX19 = 0x33 - RegX20 = 0x34 - RegX21 = 0x35 - RegX22 = 0x36 - RegX23 = 0x37 - RegX24 = 0x38 - RegX25 = 0x39 - RegX26 = 0x3a - RegX27 = 0x3b - RegX28 = 0x3c - RegX29 = 0x3d - RegX30 = 0x3e - RegSP = 0x3f - RegPC = 0x40 -) - -type ArgSpec struct { - Val_off uint64 - Arg_type uint32 - Reg_id uint8 - Arg_signed bool - Arg_bitshift int8 - Arg_is_float bool - X_pad [1]uint8 - Pad_cgo_0 [7]byte -} - -type Spec struct { - Args [12]ArgSpec - Usdt_cookie uint64 - Arg_cnt int16 - X_pad [6]uint8 -} diff --git a/support/usdt/types_def.go b/support/usdt/types_def.go deleted file mode 100644 index 8c1513277..000000000 --- a/support/usdt/types_def.go +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//go:build ignore - -package usdt // import "go.opentelemetry.io/ebpf-profiler/support/usdt" - -/* -#include "../ebpf/usdt.h" -*/ -import "C" - -// ArgType represents the type of USDT argument (libbpf-compatible) -type ArgType = uint32 - -// Argument type constants -const ( - ArgConst = C.BPF_USDT_ARG_CONST - ArgReg = C.BPF_USDT_ARG_REG - ArgRegDeref = C.BPF_USDT_ARG_REG_DEREF -) - -// Register represents CPU registers that can be used in USDT args -type Register = uint8 - -// Register ID constants - x86_64 and ARM64 -const ( - RegNone = C.BPF_USDT_REG_NONE - - // x86_64 registers (1-17) - RegRax = C.BPF_USDT_REG_RAX - RegRbx = C.BPF_USDT_REG_RBX - RegRcx = C.BPF_USDT_REG_RCX - RegRdx = C.BPF_USDT_REG_RDX - RegRsi = C.BPF_USDT_REG_RSI - RegRdi = C.BPF_USDT_REG_RDI - RegRbp = C.BPF_USDT_REG_RBP - RegRsp = C.BPF_USDT_REG_RSP - RegR8 = C.BPF_USDT_REG_R8 - RegR9 = C.BPF_USDT_REG_R9 - RegR10 = C.BPF_USDT_REG_R10 - RegR11 = C.BPF_USDT_REG_R11 - RegR12 = C.BPF_USDT_REG_R12 - RegR13 = C.BPF_USDT_REG_R13 - RegR14 = C.BPF_USDT_REG_R14 - RegR15 = C.BPF_USDT_REG_R15 - RegRip = C.BPF_USDT_REG_RIP - - // ARM64 registers (32-64) - RegX0 = C.BPF_USDT_REG_X0 - RegX1 = C.BPF_USDT_REG_X1 - RegX2 = C.BPF_USDT_REG_X2 - RegX3 = C.BPF_USDT_REG_X3 - RegX4 = C.BPF_USDT_REG_X4 - RegX5 = C.BPF_USDT_REG_X5 - RegX6 = C.BPF_USDT_REG_X6 - RegX7 = C.BPF_USDT_REG_X7 - RegX8 = C.BPF_USDT_REG_X8 - RegX9 = C.BPF_USDT_REG_X9 - RegX10 = C.BPF_USDT_REG_X10 - RegX11 = C.BPF_USDT_REG_X11 - RegX12 = C.BPF_USDT_REG_X12 - RegX13 = C.BPF_USDT_REG_X13 - RegX14 = C.BPF_USDT_REG_X14 - RegX15 = C.BPF_USDT_REG_X15 - RegX16 = C.BPF_USDT_REG_X16 - RegX17 = C.BPF_USDT_REG_X17 - RegX18 = C.BPF_USDT_REG_X18 - RegX19 = C.BPF_USDT_REG_X19 - RegX20 = C.BPF_USDT_REG_X20 - RegX21 = C.BPF_USDT_REG_X21 - RegX22 = C.BPF_USDT_REG_X22 - RegX23 = C.BPF_USDT_REG_X23 - RegX24 = C.BPF_USDT_REG_X24 - RegX25 = C.BPF_USDT_REG_X25 - RegX26 = C.BPF_USDT_REG_X26 - RegX27 = C.BPF_USDT_REG_X27 - RegX28 = C.BPF_USDT_REG_X28 - RegX29 = C.BPF_USDT_REG_X29 - RegX30 = C.BPF_USDT_REG_X30 - RegSP = C.BPF_USDT_REG_SP - RegPC = C.BPF_USDT_REG_PC -) - -// ArgSpec represents a single USDT argument specification -// Must match struct bpf_usdt_arg_spec in ../ebpf/usdt.h -type ArgSpec C.struct_bpf_usdt_arg_spec - -// Spec represents all arguments for a USDT probe (libbpf-compatible) -// Must match struct bpf_usdt_spec in ../ebpf/usdt.h -type Spec C.struct_bpf_usdt_spec diff --git a/test/cudaverify/cuda_verifier_test.go b/test/cudaverify/cuda_verifier_test.go index 5e6dff7b0..337e64497 100644 --- a/test/cudaverify/cuda_verifier_test.go +++ b/test/cudaverify/cuda_verifier_test.go @@ -23,15 +23,32 @@ import ( var soPath = flag.String("so-path", "/libparcagpucupti.so", "path to libparcagpucupti.so") +func TestMain(m *testing.M) { + flag.Parse() + + if os.Getuid() == 0 { + rc := cInitParcaGPU(*soPath) + if rc != 0 { + os.Exit(1) + } + } + + code := m.Run() + + if os.Getuid() == 0 { + cCleanupParcaGPU() + } + + os.Exit(code) +} + // runEndToEnd exercises the full process-manager driven GPU probe attachment flow: // // 1. Start the full tracer pipeline (PID event processor, map monitors, profiling). -// 2. ForceProcessPID to trigger initial process sync — this causes the tracer to -// read our /proc/self/maps, discover libc, and attach the dlopen uprobe via rtld. -// 3. Wait until the dlopen uprobe is confirmed attached (metric increments). -// 4. dlopen libparcagpu — the dlopen uprobe fires, triggering a re-sync that -// discovers libparcagpu and automatically attaches the GPU USDT probes. -// 5. Verify GPU interpreter instance is attached, then simulate kernel launches +// 2. ForceProcessPID to trigger process sync — the tracer reads /proc/self/maps, +// discovers libc and libparcagpucupti.so (loaded in TestMain), and attaches +// the GPU USDT probes. +// 3. Verify GPU interpreter instance is attached, then simulate kernel launches // and check that timing events arrive on the perf buffer. func runEndToEnd(t *testing.T, multiProbe bool) { t.Helper() @@ -70,8 +87,7 @@ func runEndToEnd(t *testing.T, multiProbe bool) { return false }, 30*time.Second, 200*time.Millisecond, "process manager never synced our PID") - // Set up perf reader on the cuda_timing_events map BEFORE the dlopen so we - // don't miss any events. + // Set up perf reader on the cuda_timing_events map before simulation. timingMap := trc.GetEbpfMaps()["cuda_timing_events"] require.NotNil(t, timingMap, "cuda_timing_events map not found") @@ -79,14 +95,8 @@ func runEndToEnd(t *testing.T, multiProbe bool) { require.NoError(t, err, "perf.NewReader failed") defer reader.Close() - // dlopen libparcagpu — this fires the dlopen uprobe, which causes a PID - // re-sync. The process manager will discover the newly mapped .so, the GPU - // loader will find its USDT probes, and Attach will hook them up. - rc := cInitParcaGPU(*soPath) - require.Equal(t, 0, rc, "init_parcagpu (dlopen) failed") - defer cCleanupParcaGPU() - - // Speed up the re-sync after dlopen. + // libparcagpucupti.so was loaded in TestMain — ForceProcessPID will + // discover it from /proc/self/maps and attach the GPU USDT probes. trc.ForceProcessPID(pid) // Wait until the GPU interpreter instance appears, confirming the USDT diff --git a/test/cudaverify/mock_cupti.c b/test/cudaverify/mock_cupti.c index 244b51e19..8823c04ce 100644 --- a/test/cudaverify/mock_cupti.c +++ b/test/cudaverify/mock_cupti.c @@ -198,6 +198,18 @@ static void *soHandle = NULL; // init_parcagpu loads the .so, calls InitializeInjection, and extracts // the callback pointers that were registered via the mock CUPTI APIs. int init_parcagpu(const char *so_path) { + // Pre-load mock libcupti.so with RTLD_NODELETE so its state persists + // across test runs. InitializeInjection uses a singleton pattern and + // won't re-register CUPTI callbacks on subsequent calls, so the mock's + // globals from the first run must survive cleanup/reinit cycles. + const char *cupti_dir = getenv("TRITON_CUPTI_LIB_PATH"); + char cupti_path[512]; + if (cupti_dir) + snprintf(cupti_path, sizeof(cupti_path), "%s/libcupti.so", cupti_dir); + else + snprintf(cupti_path, sizeof(cupti_path), "libcupti.so"); + dlopen(cupti_path, RTLD_LAZY | RTLD_NODELETE); + soHandle = dlopen(so_path, RTLD_NOW | RTLD_GLOBAL); if (!soHandle) { fprintf(stderr, "mock_cupti: dlopen failed: %s\n", dlerror()); @@ -217,10 +229,28 @@ int init_parcagpu(const char *so_path) { int rc = initFunc(); fprintf(stderr, "mock_cupti: InitializeInjection returned %d\n", rc); - // Extract callbacks set by InitializeInjection via our mock CUPTI. - parcagpuCallback = __cupti_runtime_api_callback; - bufferReqCallback = __cupti_buffer_requested_callback; - bufferCompCallback = __cupti_buffer_completed_callback; + // Extract callbacks registered by InitializeInjection via cuptiSubscribe. + // + // Two cases depending on the libparcagpucupti.so build: + // (a) New builds: no DT_NEEDED for libcupti — the library loads the mock + // libcupti.so via dlopen/dlsym, so callbacks live in the mock's globals. + // (b) Old builds: DT_NEEDED for libcupti.so.N — symbols resolve from the + // test binary's --export-dynamic, so callbacks live in our own globals. + // + // Check the mock library first, then fall back to our own copy. + void *cupti_lib = dlopen(cupti_path, RTLD_LAZY); + if (cupti_lib) { + CUpti_CallbackFunc *p1 = dlsym(cupti_lib, "__cupti_runtime_api_callback"); + CUpti_BufferRequestFunc *p2 = dlsym(cupti_lib, "__cupti_buffer_requested_callback"); + CUpti_BufferCompletedFunc *p3 = dlsym(cupti_lib, "__cupti_buffer_completed_callback"); + if (p1 && *p1) parcagpuCallback = *p1; + if (p2 && *p2) bufferReqCallback = *p2; + if (p3 && *p3) bufferCompCallback = *p3; + dlclose(cupti_lib); + } + if (!parcagpuCallback) parcagpuCallback = __cupti_runtime_api_callback; + if (!bufferReqCallback) bufferReqCallback = __cupti_buffer_requested_callback; + if (!bufferCompCallback) bufferCompCallback = __cupti_buffer_completed_callback; if (!parcagpuCallback) { fprintf(stderr, "mock_cupti: parcagpuCuptiCallback is NULL\n"); diff --git a/test/distro-qemu/build-initramfs.sh b/test/distro-qemu/build-initramfs.sh index 4c2f2fc04..2c49322a4 100755 --- a/test/distro-qemu/build-initramfs.sh +++ b/test/distro-qemu/build-initramfs.sh @@ -100,10 +100,10 @@ copy_lib_deps() { cp "${BUILD_DIR}"/*.test "$ROOTFS_DIR/" cp "${PARCAGPU_DIR}/libparcagpucupti.so" "$ROOTFS_DIR/" -# Copy stub libcupti .so and libstdc++ into the RUNPATH so dlopen of -# libparcagpucupti.so can resolve its DT_NEEDED entries. -for stub in "${PARCAGPU_DIR}"/libcupti.so*; do - [ -f "$stub" ] && cp "$stub" "$ROOTFS_DIR/usr/local/cuda/lib64/" +# Copy mock CUPTI/CUDA libraries into the RUNPATH so dlopen of +# libparcagpucupti.so can resolve them at runtime. +for lib in "${PARCAGPU_DIR}"/libcupti.so* "${PARCAGPU_DIR}"/libcuda.so*; do + [ -f "$lib" ] && cp -a "$lib" "$ROOTFS_DIR/usr/local/cuda/lib64/" done LIBSTDCXX=$(find /lib* /usr/lib* -name 'libstdc++.so.6' 2>/dev/null | head -1) if [ -n "$LIBSTDCXX" ]; then @@ -155,6 +155,8 @@ mount -t debugfs debugfs /sys/kernel/debug 2>/dev/null || true export DEBUG_TEST=1 # Help the dynamic linker find libs in the CUDA RUNPATH. export LD_LIBRARY_PATH=/usr/local/cuda/lib64 +# Tell libparcagpucupti.so where to find the stub libcupti.so. +export TRITON_CUPTI_LIB_PATH=/usr/local/cuda/lib64 # Run the tests echo "" diff --git a/test/distro-qemu/download-parcagpu.sh b/test/distro-qemu/download-parcagpu.sh index 1aa0e2a93..1beb3fc1d 100755 --- a/test/distro-qemu/download-parcagpu.sh +++ b/test/distro-qemu/download-parcagpu.sh @@ -58,25 +58,45 @@ if [[ ! -f "${PARCAGPU_DIR}/libparcagpucupti.so" ]]; then ls -la "${PARCAGPU_DIR}/libparcagpucupti.so" fi -# Build a stub libcupti .so so that dlopen of libparcagpucupti.so succeeds -# without a real CUDA installation. The stub only needs to satisfy the -# DT_NEEDED file lookup — the actual CUPTI symbols are provided by -# mock_cupti.c in the test binary (exported via --export-dynamic). -CUPTI_SONAME=$(readelf -d "${PARCAGPU_DIR}/libparcagpucupti.so" \ - | sed -n 's/.*NEEDED.*\[\(libcupti\.so[^]]*\)\].*/\1/p') - -if [[ -n "${CUPTI_SONAME}" && ! -f "${PARCAGPU_DIR}/${CUPTI_SONAME}" ]]; then - STUB_C=$(mktemp --suffix=.c) - echo "void __cupti_stub(void){}" > "${STUB_C}" - - # Determine cross-compiler for the target arch. - case "$QEMU_ARCH" in - aarch64) STUB_CC="${CC:-aarch64-linux-gnu-gcc}" ;; - *) STUB_CC="${CC:-cc}" ;; - esac - - ${STUB_CC} -shared -o "${PARCAGPU_DIR}/${CUPTI_SONAME}" \ - -Wl,-soname,"${CUPTI_SONAME}" "${STUB_C}" - rm -f "${STUB_C}" - echo "✅ Built stub ${CUPTI_SONAME}" +# Build mock libcupti.so and libcuda.so from the parcagpu repo's test sources. +# These provide real mock implementations of all CUPTI/CUDA APIs that +# libparcagpucupti.so resolves via dlsym at runtime. +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +MOCK_HEADERS="${SCRIPT_DIR}/mock-cupti-headers" +PARCAGPU_REPO="parca-dev/parcagpu" + +# Determine compiler for the target arch. +if [ "$QEMU_ARCH" = "aarch64" ] && [ "$(uname -m)" != "aarch64" ]; then + STUB_CC="${CC:-aarch64-linux-gnu-gcc}" +else + STUB_CC="${CC:-cc}" +fi + +if [[ ! -f "${PARCAGPU_DIR}/libcupti.so" ]]; then + echo "Building mock libcupti.so from ${PARCAGPU_REPO}..." + MOCK_SRC=$(mktemp -d) + + # Download mock sources from the parcagpu repo. + for f in test/mock_cupti.c test/mock_cuda.c; do + curl -sL "https://raw.githubusercontent.com/${PARCAGPU_REPO}/main/${f}" \ + -o "${MOCK_SRC}/$(basename "$f")" + done + + # Build mock libcupti.so with our minimal type-definition headers. + ${STUB_CC} -shared -fPIC -o "${PARCAGPU_DIR}/libcupti.so" \ + -Wl,-soname,"libcupti.so" \ + -I"${MOCK_HEADERS}" \ + "${MOCK_SRC}/mock_cupti.c" + echo "✅ Built mock libcupti.so" + + # Build mock libcuda.so. + ${STUB_CC} -shared -fPIC -o "${PARCAGPU_DIR}/libcuda.so" \ + -Wl,-soname,"libcuda.so.1" \ + -I"${MOCK_HEADERS}" \ + "${MOCK_SRC}/mock_cuda.c" + # Triton's Proton looks for the versioned soname. + ln -sf libcuda.so "${PARCAGPU_DIR}/libcuda.so.1" + echo "✅ Built mock libcuda.so" + + rm -rf "${MOCK_SRC}" fi diff --git a/test/distro-qemu/mock-cupti-headers/cuda.h b/test/distro-qemu/mock-cupti-headers/cuda.h new file mode 100644 index 000000000..b03eb480f --- /dev/null +++ b/test/distro-qemu/mock-cupti-headers/cuda.h @@ -0,0 +1,12 @@ +// Minimal CUDA Driver API type definitions for building mock libraries. +// Only the types referenced by parcagpu's test/mock_cuda.c are needed. +#ifndef MOCK_CUDA_H +#define MOCK_CUDA_H + +typedef int CUresult; +#define CUDA_SUCCESS 0 +#define CUDA_ERROR_INVALID_VALUE 1 + +typedef void *CUcontext; + +#endif // MOCK_CUDA_H diff --git a/test/distro-qemu/mock-cupti-headers/cupti.h b/test/distro-qemu/mock-cupti-headers/cupti.h new file mode 100644 index 000000000..d3fe9750f --- /dev/null +++ b/test/distro-qemu/mock-cupti-headers/cupti.h @@ -0,0 +1,108 @@ +// Minimal CUPTI type definitions for building mock libraries. +// Only the types referenced by parcagpu's test/mock_cupti.c are needed. +// Struct layouts must be ABI-compatible with the real CUPTI headers. +#ifndef MOCK_CUPTI_H +#define MOCK_CUPTI_H + +#include +#include +#include "cuda.h" + +typedef int CUptiResult; +#define CUPTI_SUCCESS 0 +#define CUPTI_ERROR_MAX_LIMIT_REACHED 21 +#define CUPTI_ERROR_INVALID_KIND 46 + +typedef void *CUpti_SubscriberHandle; + +typedef enum { + CUPTI_CB_DOMAIN_RUNTIME_API = 2, + CUPTI_CB_DOMAIN_DRIVER_API = 3, + CUPTI_CB_DOMAIN_RESOURCE = 4, +} CUpti_CallbackDomain; + +typedef uint32_t CUpti_CallbackId; + +#define CUPTI_CBID_RESOURCE_CONTEXT_CREATED 1 +#define CUPTI_CBID_RESOURCE_MODULE_LOADED 4 + +typedef void (*CUpti_CallbackFunc)(void *userdata, + CUpti_CallbackDomain domain, + CUpti_CallbackId cbid, + const void *cbdata); + +typedef void (*CUpti_BufferRequestFunc)(uint8_t **buffer, size_t *size, + size_t *maxNumRecords); +typedef void (*CUpti_BufferCompletedFunc)(CUcontext ctx, uint32_t streamId, + uint8_t *buffer, size_t size, + size_t validSize); + +typedef uint32_t CUpti_ActivityKind; +#define CUPTI_ACTIVITY_KIND_KERNEL 3 +#define CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL 10 +#define CUPTI_ACTIVITY_KIND_GRAPH_TRACE 34 + +typedef struct { + uint32_t kind; +} CUpti_Activity; + +// CUpti_ActivityKernel5 — 160 bytes, matching cupti_activity.h. +typedef struct { + uint32_t kind; // offset 0 + uint8_t _pad1[12]; // offset 4 + uint64_t start; // offset 16 + uint64_t end; // offset 24 + uint64_t completed; // offset 32 + uint32_t deviceId; // offset 40 + uint32_t contextId; // offset 44 + uint32_t streamId; // offset 48 + uint8_t _pad2[40]; // offset 52 + uint32_t correlationId; // offset 92 + int64_t gridId; // offset 96 + const char *name; // offset 104 + uint64_t _reserved0; // offset 112 + uint64_t queued; // offset 120 + uint64_t submitted; // offset 128 + uint8_t _pad3[8]; // offset 136 + uint64_t graphNodeId; // offset 144 + uint32_t shmemLimitCfg; // offset 152 + uint32_t graphId; // offset 156 +} __attribute__((aligned(8))) CUpti_ActivityKernel5; + +// CUpti_ActivityGraphTrace — 64 bytes. +typedef struct { + uint32_t kind; + uint8_t _pad[60]; +} __attribute__((aligned(8))) CUpti_ActivityGraphTrace; + +// Resource callback data. +typedef struct { + CUcontext context; + void *resourceDescriptor; +} CUpti_ResourceData; + +typedef struct { + const char *pCubin; + size_t cubinSize; +} CUpti_ModuleResourceData; + +// cuptiGetCubinCrc params. +typedef struct { + const char *cubin; + size_t cubinSize; + uint64_t cubinCrc; +} CUpti_GetCubinCrcParams; + +// cuptiGetSassToSourceCorrelation params. +typedef struct { + uint64_t pcOffset; + const char *functionName; + uint32_t lineNumber; + char *fileName; + char *dirName; +} CUpti_GetSassToSourceCorrelationParams; + +// cuptiActivitySetAttribute placeholder. +typedef uint32_t CUpti_ActivityAttribute; + +#endif // MOCK_CUPTI_H diff --git a/test/distro-qemu/mock-cupti-headers/cupti_pcsampling.h b/test/distro-qemu/mock-cupti-headers/cupti_pcsampling.h new file mode 100644 index 000000000..0662da64f --- /dev/null +++ b/test/distro-qemu/mock-cupti-headers/cupti_pcsampling.h @@ -0,0 +1,50 @@ +// Minimal CUPTI PC-sampling type definitions for building mock libraries. +#ifndef MOCK_CUPTI_PCSAMPLING_H +#define MOCK_CUPTI_PCSAMPLING_H + +#include +#include +#include "cupti.h" + +#define CUPTI_STALL_REASON_STRING_SIZE 128 +#define CUPTI_PC_SAMPLING_MAX_STALL_REASONS 32 + +typedef struct { + uint32_t pcSamplingStallReasonIndex; + uint32_t samples; +} CUpti_PCSamplingStallReason; + +typedef struct { + size_t size; + uint64_t cubinCrc; + uint64_t pcOffset; + uint32_t functionIndex; + char *functionName; + uint32_t stallReasonCount; + CUpti_PCSamplingStallReason stallReason[CUPTI_PC_SAMPLING_MAX_STALL_REASONS]; +} CUpti_PCSamplingPCData; + +typedef struct { + uint32_t collectNumPcs; + uint32_t totalNumPcs; + uint32_t remainingNumPcs; + uint64_t totalSamples; + CUpti_PCSamplingPCData *pPcData; +} CUpti_PCSamplingData; + +typedef struct { CUpti_PCSamplingData *pcSamplingData; } CUpti_PCSamplingGetDataParams; +typedef struct { void *dummy; } CUpti_PCSamplingEnableParams; +typedef struct { void *dummy; } CUpti_PCSamplingDisableParams; +typedef struct { void *dummy; } CUpti_PCSamplingStartParams; +typedef struct { void *dummy; } CUpti_PCSamplingStopParams; +typedef struct { void *dummy; } CUpti_PCSamplingConfigurationInfoParams; +typedef struct { + uint32_t *numStallReasons; +} CUpti_PCSamplingGetNumStallReasonsParams; +typedef struct { + size_t numStallReasons; + char (*stallReasons)[CUPTI_STALL_REASON_STRING_SIZE]; + uint32_t *stallReasonIndex; +} CUpti_PCSamplingGetStallReasonsParams; + +#endif // MOCK_CUPTI_PCSAMPLING_H