diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index a20c589e0..8cc89cb3d 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -11,7 +11,7 @@ on: jobs: analyze: name: Analyze Go (${{ matrix.target_arch }}) - if: ${{ github.actor != 'dependabot[bot]' }} + if: ${{ github.actor != 'dependabot[bot]' && github.repository == 'open-telemetry/opentelemetry-ebpf-profiler' }} runs-on: ubuntu-24.04 strategy: matrix: diff --git a/.github/workflows/env/action.yml b/.github/workflows/env/action.yml index 3b15d1408..9ca510ea2 100644 --- a/.github/workflows/env/action.yml +++ b/.github/workflows/env/action.yml @@ -30,12 +30,6 @@ runs: sudo apt-get install -y curl unzip gcc-aarch64-linux-gnu \ libc6-arm64-cross qemu-user-binfmt libc6:arm64 \ musl-dev:amd64 musl-dev:arm64 musl-tools binutils-aarch64-linux-gnu - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - cache-dependency-path: go.sum - id: go - name: Install Rust uses: dtolnay/rust-toolchain@stable with: diff --git a/.github/workflows/fossa.yml b/.github/workflows/fossa.yml index 3beaa09b7..00d9703d5 100644 --- a/.github/workflows/fossa.yml +++ b/.github/workflows/fossa.yml @@ -11,6 +11,7 @@ permissions: jobs: fossa: runs-on: ubuntu-latest + if: github.repository == 'open-telemetry/opentelemetry-ebpf-profiler' steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 diff --git a/.github/workflows/push-docker-image.yml b/.github/workflows/push-docker-image.yml index 2e47aca8d..6a3aa750a 100644 --- a/.github/workflows/push-docker-image.yml +++ b/.github/workflows/push-docker-image.yml @@ -9,6 +9,7 @@ on: jobs: build-and-push: runs-on: ubuntu-latest + if: github.repository == 'open-telemetry/opentelemetry-ebpf-profiler' steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/unit-test-on-pull-request.yml b/.github/workflows/unit-test-on-pull-request.yml index f52a57c8c..45aeb5f11 100644 --- a/.github/workflows/unit-test-on-pull-request.yml +++ b/.github/workflows/unit-test-on-pull-request.yml @@ -15,6 +15,13 @@ jobs: uses: actions/checkout@v4 - name: Set up environment uses: ./.github/workflows/env + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + check-latest: true + cache-dependency-path: go.sum + id: go - name: Check for changes in licenses of dependencies run: | make legal @@ -34,6 +41,13 @@ jobs: uses: actions/checkout@v4 - name: Set up environment uses: ./.github/workflows/env + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + check-latest: true + cache-dependency-path: go.sum + id: go - name: Get linter version id: linter-version run: (echo -n "version="; make linter-version) >> "$GITHUB_OUTPUT" @@ -61,6 +75,13 @@ jobs: uses: actions/checkout@v4 - name: Set up environment uses: ./.github/workflows/env + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + check-latest: true + cache-dependency-path: go.sum + id: go - name: Cache coredump modules uses: actions/cache@v4 with: @@ -98,13 +119,21 @@ jobs: build-integration-test-binaries: - name: Build integration test binaries (${{ matrix.target_arch }}) + name: Build integration test binaries (${{ matrix.target_arch }}-${{ matrix.go_version}}) runs-on: ubuntu-24.04 timeout-minutes: 10 strategy: matrix: target_arch: [amd64, arm64] + go_version: [1.23, 1.24] steps: + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go_version }} + check-latest: true + cache-dependency-path: go.sum + id: go - name: Clone code uses: actions/checkout@v4 - name: Set up environment @@ -114,16 +143,17 @@ jobs: - name: Upload integration test binaries uses: actions/upload-artifact@v4 with: - name: integration-test-binaries-${{ matrix.target_arch }} + name: integration-test-binaries-${{ matrix.target_arch }}-${{ matrix.go_version}} path: support/*.test integration-tests: - name: Integration tests (v${{ matrix.kernel }} ${{ matrix.target_arch }}) + name: Integration tests (v${{ matrix.kernel }} ${{ matrix.target_arch }} ${{ matrix.go_version}}) runs-on: ubuntu-24.04 needs: build-integration-test-binaries timeout-minutes: 10 strategy: matrix: + go_version: [1.23, 1.24] include: # List of available kernels here: # https://github.com/cilium/ci-kernels/pkgs/container/ci-kernels/versions?filters%5Bversion_type%5D=tagged @@ -147,6 +177,13 @@ jobs: steps: - name: Clone code uses: actions/checkout@v4 + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go_version }} + check-latest: true + cache-dependency-path: go.sum + id: go - name: Install dependencies run: | sudo apt-get update -y @@ -159,7 +196,7 @@ jobs: sudo mv ~/go/bin/bluebox /usr/local/bin/. - name: Fetch integration test binaries uses: actions/download-artifact@v4 - with: { name: "integration-test-binaries-${{ matrix.target_arch }}" } + with: { name: "integration-test-binaries-${{ matrix.target_arch }}-${{ matrix.go_version}}" } - name: Fetch precompiled kernel run: | install -d ci-kernels diff --git a/.golangci.yml b/.golangci.yml index a42466039..ec90a4a9e 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -91,8 +91,6 @@ linters: - wastedassign - wsl - wrapcheck - # the following linters are deprecated - - exportloopref # we don't want to change code to Go 1.22+ yet - intrange - copyloopvar diff --git a/LICENSES/go.opentelemetry.io/auto/sdk/LICENSE b/LICENSES/go.opentelemetry.io/auto/sdk/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/LICENSES/go.opentelemetry.io/auto/sdk/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile index e655faa7d..c3981968b 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,7 @@ LDFLAGS := -X go.opentelemetry.io/ebpf-profiler/vc.version=$(VERSION) \ -extldflags=-static GO_TAGS := osusergo,netgo -EBPF_FLAGS := +EBPF_FLAGS := GO_FLAGS := -buildvcs=false -ldflags="$(LDFLAGS)" @@ -127,9 +127,11 @@ test-deps: ($(MAKE) -C "$(testdata_dir)") || exit ; \ ) -TEST_INTEGRATION_BINARY_DIRS := tracer processmanager/ebpf support +TEST_INTEGRATION_BINARY_DIRS := tracer processmanager/ebpf support go_labels integration-test-binaries: generate ebpf +# Call it a ".test" even though it isn't to get included into bluebox initramfs + go build -o ./support/go_labels_canary.test ./go_labels $(foreach test_name, $(TEST_INTEGRATION_BINARY_DIRS), \ (go test -ldflags='-extldflags=-static' -trimpath -c \ -tags $(GO_TAGS),static_build,integration \ diff --git a/README.md b/README.md index faea58ea4..03499ec90 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,6 @@ eBPF. optimizations and offer a higher precision of function call chains. ## Building -## Quick Start -If you'd like to quickly test the agent, you can skip to the ["Visualizing data locally"](https://github.com/open-telemetry/opentelemetry-ebpf-profiler?tab=readme-ov-file#visualizing-data-locally) section and launch devfiler. From there, follow the download links for prebuilt agent binaries. ## Platform Requirements The agent can be built with the provided make targets. Docker is required for containerized builds, and both amd64 and arm64 architectures are supported. @@ -77,61 +75,6 @@ of the recently released OTel profiling [signal](https://github.com/open-telemet The agent loads the eBPF program and its maps, starts unwinding and reports captured traces to the backend. -## Visualizing data locally - -We created a desktop application called "devfiler" that allows visualizing the -profiling agent's output locally, making it very convenient for development use. -devfiler spins up a local server that listens on `0.0.0.0:11000`. - -![Screenshot of devfiler UI](./doc/devfiler.png) - -To run it, simply download and unpack the archive from the following URL: - -https://upload.elastic.co/d/05d06f7a9a1898bc39e90b74647a1fb3ae19db7497d88e22aea27f47b8565dde - -Authentication token: `abe24ec533283049` - - -The archive contains a build for each of the following platforms: - -- macOS (Intel) -- macOS (Apple Silicon) -- Linux AppImage (x86_64) -- Linux AppImage (aarch64) - -> [!IMPORTANT] -> -> The macOS application isn't properly signed with an Apple developer certificate: macOS will -> complain about the application being corrupted on start. To work around that, simply run the following -> command after downloading the archive: -> -> ``` -> xattr -d com.apple.quarantine ~/Downloads/devfiler.app.zip -> ``` -> -> If you did this correctly, the application should run just fine after unpacking the ZIP. - -> [!NOTE] -> devfiler is currently in an experimental preview stage. - -### macOS - -This build of devfiler is currently not signed with a globally trusted Apple -developer ID, but with a developer certificate. If you simply double-click the -application, you'll run into an error. Instead of opening it with a double -click, simply do a **right-click** on `devfiler.app`, then choose "Open". If you -go this route, you'll instead be presented with the option to run it anyway. - -### Linux - -The AppImages in the archive should run on any Linux distribution with a -reasonably modern glibc and libgl installation. To run the application, simply -extract the archive and then do: - -```console -./devfiler-appimage-$(uname -m).AppImage -``` - ## Agent internals The host agent is a Go application that is deployed to all machines customers diff --git a/cli_flags.go b/cli_flags.go index 6ace06145..fbc770127 100644 --- a/cli_flags.go +++ b/cli_flags.go @@ -26,6 +26,7 @@ const ( defaultProbabilisticInterval = 1 * time.Minute defaultArgSendErrorFrames = false defaultOffCPUThreshold = 0 + defaultEnvVarsValue = "" // This is the X in 2^(n + x) where n is the default hardcoded map size value defaultArgMapScaleFactor = 0 @@ -67,6 +68,8 @@ var ( "Valid values are in the range [1..%d], and 0 to disable off-cpu profiling."+ "Default is %d.", support.OffCPUThresholdMax, defaultOffCPUThreshold) + envVarsHelp = "Comma separated list of environment variables that will be reported with the" + + "captured profiling samples." ) // Package-scope variable, so that conditionally compiled other components can refer @@ -123,6 +126,8 @@ func parseArgs() (*controller.Config, error) { fs.UintVar(&args.OffCPUThreshold, "off-cpu-threshold", defaultOffCPUThreshold, offCPUThresholdHelp) + fs.StringVar(&args.IncludeEnvVars, "env-vars", defaultEnvVarsValue, envVarsHelp) + fs.Usage = func() { fs.PrintDefaults() } diff --git a/customlabelstest/customlabels_test.go b/customlabelstest/customlabels_test.go new file mode 100644 index 000000000..b4d48d102 --- /dev/null +++ b/customlabelstest/customlabels_test.go @@ -0,0 +1,85 @@ +package customlabelstest + +import ( + "context" + "os/exec" + "regexp" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/ebpf-profiler/testutils" + tracertypes "go.opentelemetry.io/ebpf-profiler/tracer/types" +) + +func TestNativeCustomLabels(t *testing.T) { + if !testutils.IsRoot() { + t.Skip("root privileges required") + } + + r := &testutils.MockReporter{} + enabledTracers, _ := tracertypes.Parse("all") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + traceCh, _ := testutils.StartTracer(ctx, t, enabledTracers, r) + // TODO - change this to `cargo build --release --bin custom-labels-example` + // once we have the Rust workspace from upstream. + cmd := exec.Command("cargo", "build", "--release", + "--manifest-path", "./rust-crates/custom-labels-example/Cargo.toml") + out, err := cmd.CombinedOutput() + require.NoError(t, err, string(out)) + + errCh := make(chan error, 1) + + cmd = exec.CommandContext(ctx, + "./rust-crates/custom-labels-example/target/release/custom-labels-example") + err = cmd.Start() + require.NoError(t, err) + + go func() { + err := cmd.Wait() + errCh <- err + }() + + stopCh := time.After(10 * time.Second) + + re := regexp.MustCompile(`^[a-zA-Z0-9]{16}$`) + good := false +Loop: + for { + select { + case trace, ok := <-traceCh: + if !ok { + break Loop + } + t.Logf("got a trace %s", trace.Comm) + if len(trace.CustomLabels) > 0 { + var gotL1, gotL2 bool + for k, v := range trace.CustomLabels { + switch k { + case "l1": + gotL1 = true + require.True(t, re.MatchString(v)) + t.Logf("got l1, value is %s", v) + case "l2": + gotL2 = true + require.True(t, re.MatchString(v)) + t.Logf("got l2, value is %s", v) + default: + require.Failf(t, "fail", "got unexpected label: %s=%s", k, v) + } + } + if gotL1 && gotL2 { + good = true + break Loop + } + } + case err := <-errCh: + require.Failf(t, "fail", "Failed to run custom-labels-example, err = %v", err) + case <-stopCh: + require.Fail(t, "fail", "Failed to get labels after ten seconds") + } + } + require.True(t, good) +} diff --git a/doc/devfiler.png b/doc/devfiler.png deleted file mode 100644 index e91a30e86..000000000 Binary files a/doc/devfiler.png and /dev/null differ diff --git a/go.mod b/go.mod index a13f6f27c..8ed40ee08 100644 --- a/go.mod +++ b/go.mod @@ -27,12 +27,12 @@ require ( go.opentelemetry.io/collector/receiver v0.116.0 go.opentelemetry.io/collector/receiver/receivertest v0.116.0 go.opentelemetry.io/collector/receiver/xreceiver v0.116.0 - go.opentelemetry.io/otel v1.32.0 - go.opentelemetry.io/otel/metric v1.32.0 + go.opentelemetry.io/otel v1.35.0 + go.opentelemetry.io/otel/metric v1.35.0 golang.org/x/arch v0.10.0 golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 - golang.org/x/sync v0.10.0 - golang.org/x/sys v0.29.0 + golang.org/x/sync v0.12.0 + golang.org/x/sys v0.31.0 google.golang.org/grpc v1.69.2 ) @@ -56,7 +56,7 @@ require ( github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/google/go-cmp v0.6.0 // indirect + github.com/google/go-cmp v0.7.0 // indirect github.com/josharian/native v1.1.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect @@ -65,6 +65,7 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/collector/component/componenttest v0.116.0 // indirect go.opentelemetry.io/collector/config/configtelemetry v0.116.0 // indirect go.opentelemetry.io/collector/consumer v1.22.0 // indirect @@ -72,11 +73,11 @@ require ( go.opentelemetry.io/collector/pipeline v0.116.0 // indirect go.opentelemetry.io/otel/sdk v1.32.0 // indirect go.opentelemetry.io/otel/sdk/metric v1.32.0 // indirect - go.opentelemetry.io/otel/trace v1.32.0 // indirect + go.opentelemetry.io/otel/trace v1.35.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/net v0.33.0 // indirect - golang.org/x/text v0.21.0 // indirect + golang.org/x/net v0.37.0 // indirect + golang.org/x/text v0.23.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d // indirect google.golang.org/protobuf v1.36.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index ea03f9710..75db7bf02 100644 --- a/go.sum +++ b/go.sum @@ -56,8 +56,8 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -94,8 +94,8 @@ github.com/peterbourgon/ff/v3 v3.4.0 h1:QBvM/rizZM1cB0p0lGMdmR7HxZeI/ZrBWB4DqLkM github.com/peterbourgon/ff/v3 v3.4.0/go.mod h1:zjJVUhx+twciwfDl0zBcFzl4dW8axCRyXE/eKY9RztQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= -github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -111,6 +111,8 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= go.opentelemetry.io/collector/component v0.116.0 h1:SQE1YeVfYCN7bw1n4hknUwJE5U/1qJL552sDhAdSlaA= go.opentelemetry.io/collector/component v0.116.0/go.mod h1:MYgXFZWDTq0uPgF1mkLSFibtpNqksRVAOrmihckOQEs= go.opentelemetry.io/collector/component/componenttest v0.116.0 h1:UIcnx4Rrs/oDRYSAZNHRMUiYs2FBlwgV5Nc0oMYfR6A= @@ -139,16 +141,16 @@ go.opentelemetry.io/collector/receiver/receivertest v0.116.0 h1:ZF4QVcots0OUiutb go.opentelemetry.io/collector/receiver/receivertest v0.116.0/go.mod h1:7GGvtHhW3o6457/wGtSWXJtCtlW6VGFUZSlf6wboNTw= go.opentelemetry.io/collector/receiver/xreceiver v0.116.0 h1:Kc+ixqgMjU2sHhzNrFn5TttVNiJlJwTLL3sQrM9uH6s= go.opentelemetry.io/collector/receiver/xreceiver v0.116.0/go.mod h1:H2YGSNFoMbWMIDvB8tzkReHSVqvogihjtet+ppHfYv8= -go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U= -go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg= -go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M= -go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8= +go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ= +go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y= +go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M= +go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE= go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4= go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU= go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU= go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ= -go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM= -go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8= +go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs= +go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= @@ -168,24 +170,24 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= -golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= +golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c= +golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= -golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= -golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= +golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= -golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= diff --git a/go_labels/main.go b/go_labels/main.go new file mode 100644 index 000000000..b574268ef --- /dev/null +++ b/go_labels/main.go @@ -0,0 +1,37 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "fmt" + "math/rand" + "runtime/pprof" + "time" +) + +//nolint:gosec +func randomString(n int) string { + letters := []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") + s := make([]rune, n) + for i := range s { + s[i] = letters[rand.Intn(len(letters))] + } + return string(s) +} + +// This is a normal main program that when go build will be statically linked, this is required +// to work with qemu/bluebox testing harness. A statically linked go test built binary doesn't +// work with the go labels extractor ebpf program, not sure yet if this is a bug. +func main() { + labels := pprof.Labels("l1", randomString(16), "l2", randomString(16), "l3", randomString(16)) + lastUpdate := time.Now() + pprof.Do(context.TODO(), labels, func(context.Context) { + //nolint:revive + for time.Since(lastUpdate) < 10*time.Second { + // CPU go burr on purpose. + } + }) + fmt.Println("PASS") +} diff --git a/go_labels/main_test.go b/go_labels/main_test.go new file mode 100644 index 000000000..c6f0313dd --- /dev/null +++ b/go_labels/main_test.go @@ -0,0 +1,50 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 +package main + +import ( + "context" + "os" + "os/exec" + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/ebpf-profiler/testutils" + tracertypes "go.opentelemetry.io/ebpf-profiler/tracer/types" +) + +func TestGoCustomLabels(t *testing.T) { + if !testutils.IsRoot() { + t.Skip("root privileges required") + } + + ctx, cancel := context.WithCancel(context.Background()) + + r := &testutils.MockReporter{} + enabledTracers, _ := tracertypes.Parse("") + enabledTracers.Enable(tracertypes.GoLabels) + traceCh, _ := testutils.StartTracer(ctx, t, enabledTracers, r) + + // Use a separate exe for getting labels as the bpf code doesn't seem to work with + // go test static binaries at the moment, not clear if that's a problem with the bpf + // code or a bug/fact of life for static go binaries and getting g from TLS. + cmd := exec.Command("./go_labels_canary.test") + err := cmd.Start() + require.NoError(t, err) + + // Wait 1 second for traces to arrive. + for trace := range traceCh { + if trace == nil { + continue + } + if len(trace.CustomLabels) > 0 { + for k, v := range trace.CustomLabels { + t.Logf("Custom label: %s=%s", k, v) + } + break + } + } + cancel() + _ = cmd.Process.Signal(os.Kill) + _ = cmd.Wait() +} diff --git a/host/host.go b/host/host.go index d04c8c381..249e9dbf9 100644 --- a/host/host.go +++ b/host/host.go @@ -60,4 +60,6 @@ type Trace struct { APMTraceID libpf.APMTraceID APMTransactionID libpf.APMTransactionID CPU int + EnvVars map[string]string + CustomLabels map[string]string } diff --git a/internal/controller/config.go b/internal/controller/config.go index 57f108248..cf457100a 100644 --- a/internal/controller/config.go +++ b/internal/controller/config.go @@ -32,15 +32,13 @@ type Config struct { Tracers string VerboseMode bool Version bool - // HostName is the name of the host. - HostName string - // IPAddress is the IP address of the host that sends data to CollAgentAddr. - IPAddress string - OffCPUThreshold uint + OffCPUThreshold uint Reporter reporter.Reporter Fs *flag.FlagSet + + IncludeEnvVars string } const ( diff --git a/internal/controller/controller.go b/internal/controller/controller.go index c696d1e36..7d823352e 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -3,12 +3,14 @@ package controller // import "go.opentelemetry.io/ebpf-profiler/internal/control import ( "context" "fmt" + "strings" "time" log "github.com/sirupsen/logrus" "github.com/tklauser/numcpus" "go.opentelemetry.io/ebpf-profiler/host" + "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/metrics" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/times" @@ -71,6 +73,15 @@ func (c *Controller) Start(ctx context.Context) error { return fmt.Errorf("failed to start reporter: %w", err) } + envVars := libpf.Set[string]{} + splittedEnvVars := strings.Split(c.config.IncludeEnvVars, ",") + for _, envVar := range splittedEnvVars { + envVar = strings.TrimSpace(envVar) + if envVar != "" { + envVars[envVar] = libpf.Void{} + } + } + // Load the eBPF code and map definitions trc, err := tracer.NewTracer(ctx, &tracer.Config{ Reporter: c.reporter, @@ -85,6 +96,7 @@ func (c *Controller) Start(ctx context.Context) error { ProbabilisticInterval: c.config.ProbabilisticInterval, ProbabilisticThreshold: c.config.ProbabilisticThreshold, OffCPUThreshold: uint32(c.config.OffCPUThreshold), + IncludeEnvVars: envVars, }) if err != nil { return fmt.Errorf("failed to load eBPF tracer: %w", err) diff --git a/interpreter/customlabels/customlabels.go b/interpreter/customlabels/customlabels.go new file mode 100644 index 000000000..7f9404450 --- /dev/null +++ b/interpreter/customlabels/customlabels.go @@ -0,0 +1,156 @@ +package customlabels // import "go.opentelemetry.io/ebpf-profiler/interpreter/customlabels" + +// #include +// #include "../../support/ebpf/types.h" +import "C" +import ( + "debug/elf" + "errors" + "fmt" + "regexp" + "unsafe" + + "go.opentelemetry.io/ebpf-profiler/interpreter" + "go.opentelemetry.io/ebpf-profiler/libpf" + "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" + "go.opentelemetry.io/ebpf-profiler/remotememory" +) + +const ( + abiVersionExport = "custom_labels_abi_version" + tlsExport = "custom_labels_current_set" +) + +var dsoRegex = regexp.MustCompile(`.*/libcustomlabels.*\.so|.*/customlabels\.node`) + +type data struct { + abiVersionElfVA libpf.Address + tlsAddr libpf.Address + isSharedLibrary bool +} + +var _ interpreter.Data = &data{} + +func roundUp(multiple, value uint64) uint64 { + if multiple == 0 { + return value + } + return (value + multiple - 1) / multiple * multiple +} + +func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpreter.Data, error) { + ef, err := info.GetELF() + if err != nil { + return nil, err + } + + abiVersionSym, err := ef.LookupSymbol(abiVersionExport) + if err != nil { + if errors.Is(err, pfelf.ErrSymbolNotFound) { + return nil, nil + } + + return nil, err + } + + if abiVersionSym.Size != 4 { + return nil, fmt.Errorf("abi version export has wrong size %d", abiVersionSym.Size) + } + + // If this is the libcustomlabels.so library, we are using + // global-dynamic TLS model and have to look up the TLS descriptor. + // Otherwise, assume we're the main binary and just look up the + // symbol. + isSharedLibrary := dsoRegex.MatchString(info.FileName()) + var tlsAddr libpf.Address + if isSharedLibrary { + // Resolve thread info TLS export. + tlsDescs, err := ef.TLSDescriptors() + if err != nil { + return nil, errors.New("failed to extract TLS descriptors") + } + var ok bool + tlsAddr, ok = tlsDescs[tlsExport] + if !ok { + return nil, errors.New("failed to locate TLS descriptor for custom labels") + } + } else { + tlsSym, err := ef.LookupSymbol(tlsExport) + if err != nil { + return nil, err + } + if ef.Machine == elf.EM_AARCH64 { + tlsAddr = libpf.Address(tlsSym.Address + 16) + } else if ef.Machine == elf.EM_X86_64 { + // Symbol addresses are relative to the start of the + // thread-local storage image, but the thread pointer points to the _end_ + // of the image. So we need to find the size of the image in order to know where the + // beginning is. + // + // The image is just .tdata followed by .tbss, + // but we also have to respect the alignment. + tbss, err := ef.Tbss() + if err != nil { + return nil, err + } + tdata, err := ef.Tdata() + var tdataSize uint64 + if err != nil { + // No Tdata is ok, it's the same as size 0 + if err != pfelf.ErrNoTdata { + return nil, err + } + } else { + tdataSize = tdata.Size + } + imageSize := roundUp(tbss.Addralign, tdataSize) + tbss.Size + tlsAddr = libpf.Address(int64(tlsSym.Address) - int64(imageSize)) + } else { + return nil, fmt.Errorf("unrecognized machine: %s", ef.Machine.String()) + } + } + + d := data{ + abiVersionElfVA: libpf.Address(abiVersionSym.Address), + tlsAddr: tlsAddr, + isSharedLibrary: isSharedLibrary, + } + return &d, nil +} + +type instance struct { + interpreter.InstanceStubs +} + +func (d data) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, + bias libpf.Address, rm remotememory.RemoteMemory) (interpreter.Instance, error) { + abiVersion, err := rm.Uint32Checked(bias + d.abiVersionElfVA) + if err != nil { + return nil, fmt.Errorf("failed to read custom labels ABI version: %w", err) + } + + if abiVersion != 1 { + return nil, fmt.Errorf("unsupported custom labels ABI version: %d"+ + " (only 1 is supported)", abiVersion) + } + + var tlsOffset uint64 + if d.isSharedLibrary { + // Read TLS offset from the TLS descriptor + tlsOffset = rm.Uint64(bias + d.tlsAddr + 8) + } else { + // We're in the main executable: TLS offset is known statically. + tlsOffset = uint64(d.tlsAddr) + } + + procInfo := C.NativeCustomLabelsProcInfo{tls_offset: C.u64(tlsOffset)} + if err := ebpf.UpdateProcData(libpf.CustomLabels, pid, unsafe.Pointer(&procInfo)); err != nil { + return nil, err + } + + return &instance{}, nil +} + +func (i *instance) Detach(ebpf interpreter.EbpfHandler, pid libpf.PID) error { + return ebpf.DeleteProcData(libpf.CustomLabels, pid) +} diff --git a/interpreter/golang/golang.go b/interpreter/golang/golang.go new file mode 100644 index 000000000..97064b489 --- /dev/null +++ b/interpreter/golang/golang.go @@ -0,0 +1,87 @@ +package golang // import "go.opentelemetry.io/ebpf-profiler/interpreter/golang" + +import ( + "errors" + "fmt" + "regexp" + "unsafe" + + log "github.com/sirupsen/logrus" + + "go.opentelemetry.io/ebpf-profiler/interpreter" + "go.opentelemetry.io/ebpf-profiler/libpf" + "go.opentelemetry.io/ebpf-profiler/remotememory" +) + +// #include +// #include "../../support/ebpf/types.h" +import "C" + +var goMajorMinorRegex = regexp.MustCompile(`^go\d+\.\d+`) + +type data struct { + goVersion string + offsets C.GoCustomLabelsOffsets + interpreter.InstanceStubs +} + +func (d data) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, + _ libpf.Address, _ remotememory.RemoteMemory) (interpreter.Instance, error) { + if err := ebpf.UpdateProcData(libpf.Go, pid, unsafe.Pointer(&d.offsets)); err != nil { + return nil, err + } + + return &d, nil +} + +func (d data) Detach(ebpf interpreter.EbpfHandler, pid libpf.PID) error { + return ebpf.DeleteProcData(libpf.Go, pid) +} + +func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpreter.Data, error) { + // Note: So far we have observed that offsets are always the same for any + // go1.mm.yy with fixed mm and any value of yy. That is; the major and minor + // version determine the offsets, while the patch version has no effect. + // + // If this should change in some future Go patch release, we'll need to change + // this function. + + file, err := info.GetELF() + if err != nil { + return nil, err + } + goVersion, err := ReadGoVersion(file) + if errors.Is(err, ErrNoGoVersion) { + log.Debugf("file %s is not a Go binary", info.FileName()) + return nil, nil + } + if err != nil { + return nil, err + } + log.Debugf("file %s detected as go version %s", info.FileName(), goVersion) + majorMinor := goMajorMinorRegex.FindString(goVersion) + if majorMinor == "" { + return nil, fmt.Errorf("failed to parse go version %s into goM.mm", goVersion) + } + + offsets, ok := allOffsets[majorMinor] + if !ok { + // Info instead of warn: this is often going to be fine, + // as the offsets tend not to change every release cycle. + // + // TODO: Reword the message if we upstream this, + // since it mentions `parca-agent` by name. + log.Infof("version %s unknown; using offsets for latest known Go version %s."+ + "If Go traceID integration and other custom labels support is buggy,"+ + " try upgrading parca-agent to the latest version.", goVersion, defaultVersion) + return data{ + goVersion: goVersion, + offsets: allOffsets[defaultVersion], + }, nil + } + + return data{ + goVersion: goVersion, + offsets: offsets, + }, nil +} diff --git a/interpreter/golang/readelf.go b/interpreter/golang/readelf.go new file mode 100644 index 000000000..6645ff9cb --- /dev/null +++ b/interpreter/golang/readelf.go @@ -0,0 +1,131 @@ +package golang // import "go.opentelemetry.io/ebpf-profiler/interpreter/golang" + +import ( + "bytes" + "debug/elf" + "encoding/binary" + "errors" + "io" + + "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" +) + +func getVersionSection(f *pfelf.File) io.ReaderAt { + if sec := f.Section(".go.buildinfo"); sec != nil { + return sec + } + for _, seg := range f.Progs { + if seg.Type == elf.PT_LOAD && seg.Flags&(elf.PF_X|elf.PF_W) == elf.PF_W { + return &seg + } + } + return nil +} + +var ErrNoGoVersion = errors.New("go version not found") +var buildInfoMagic = []byte("\xff Go buildinf:") + +// readBuildInfo reads build info, failing if it's not +// in the first 1 MiB of the given stream. +func readBuildInfo(s io.ReaderAt) ([]byte, error) { + const ( + buildInfoAlign = 16 + buildInfoSize = 32 + chunk = 1 << 20 + ) + buf := make([]byte, chunk) + n, err := s.ReadAt(buf, 0) + if err != nil && !errors.Is(err, io.EOF) { + return nil, err + } + data := buf[:n] + for { + i := bytes.Index(data, buildInfoMagic) + if i < 0 || len(data)-i < buildInfoSize { + break + } + if i%buildInfoAlign == 0 && len(data)-i >= buildInfoSize { + data = data[i:] + return data, nil + } + data = data[(i+buildInfoAlign-1)&^(buildInfoAlign-1):] + } + return nil, ErrNoGoVersion +} + +func decodeString(data []byte) string { + u, n := binary.Uvarint(data) + if n <= 0 || u > uint64(len(data)-n) { + return "" + } + return string(data[n : uint64(n)+u]) +} + +// readString returns the string at address addr in the executable x. +func readString(x *pfelf.File, ptrSize int, + readPtr func([]byte) uint64, addr uint64) (string, error) { + buf := make([]byte, 2*ptrSize) + n, err := x.ReadAt(buf, int64(addr)) + if err != nil { + return "", err + } + if n != len(buf) { + return "", io.EOF + } + dataAddr := readPtr(buf) + dataLen := readPtr(buf[ptrSize:]) + const maxSize = 64 // implausible that a Go version string is bigger than this + if dataLen > maxSize { + return "", ErrNoGoVersion + } + buf = make([]byte, dataLen) + n, err = x.ReadAt(buf, int64(dataAddr)) + if err != nil { + return "", err + } + if n != len(buf) { + return "", io.EOF + } + return string(buf), nil +} + +// ReadGoVersion returns the version of the Go toolchain that build the binary +// (for example, "go1.19.2"). +// +// It is guaranteed not to consume more than 1 MiB of memory. +func ReadGoVersion(f *pfelf.File) (string, error) { + vs := getVersionSection(f) + if vs == nil { + return "", ErrNoGoVersion + } + data, err := readBuildInfo(vs) + if err != nil { + return "", err + } + ptrSize := int(data[14]) + var vers string + if data[15]&2 != 0 { + vers = decodeString(data[32:]) + } else { + bigEndian := data[15] != 0 + var bo binary.ByteOrder + if bigEndian { + bo = binary.BigEndian + } else { + bo = binary.LittleEndian + } + var readPtr func([]byte) uint64 + if ptrSize == 4 { + readPtr = func(b []byte) uint64 { return uint64(bo.Uint32(b)) } + } else if ptrSize == 8 { + readPtr = bo.Uint64 + } else { + return "", ErrNoGoVersion + } + vers, err = readString(f, ptrSize, readPtr, readPtr(data[16:])) + if err != nil { + return "", err + } + } + return vers, nil +} diff --git a/interpreter/golang/runtime_data.go b/interpreter/golang/runtime_data.go new file mode 100644 index 000000000..3772dbbd1 --- /dev/null +++ b/interpreter/golang/runtime_data.go @@ -0,0 +1,121 @@ +package golang // import "go.opentelemetry.io/ebpf-profiler/interpreter/golang" + +// #include +// #include "../../support/ebpf/types.h" +import "C" + +// defaultVersion is used if the go binary has an unrecognized major+minor version. +// Consider bumping this whenever a new version of Go is released. +var defaultVersion = "go1.23" + +var allOffsets = map[string]C.GoCustomLabelsOffsets{ + "go1.11": { + m_offset: 48, + curg: 192, + labels: 344, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.12": { + m_offset: 48, + curg: 192, + labels: 344, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.13": { + m_offset: 48, + curg: 192, + labels: 344, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.14": { + m_offset: 48, + curg: 192, + labels: 344, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.15": { + m_offset: 48, + curg: 192, + labels: 344, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.16": { + m_offset: 48, + curg: 192, + labels: 344, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.17": { + m_offset: 48, + curg: 192, + labels: 360, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.18": { + m_offset: 48, + curg: 192, + labels: 360, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.19": { + m_offset: 48, + curg: 192, + labels: 360, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.20": { + m_offset: 48, + curg: 192, + labels: 360, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.21": { + m_offset: 48, + curg: 192, + labels: 344, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.22": { + m_offset: 48, + curg: 192, + labels: 344, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.23": { + m_offset: 48, + curg: 192, + labels: 352, + hmap_count: 0, + hmap_log2_bucket_count: 9, + hmap_buckets: 16, + }, + "go1.24": { + m_offset: 48, + curg: 192, + labels: 352, + }, +} diff --git a/libpf/convenience.go b/libpf/convenience.go index 1abdb6043..e936f64b2 100644 --- a/libpf/convenience.go +++ b/libpf/convenience.go @@ -4,7 +4,6 @@ package libpf // import "go.opentelemetry.io/ebpf-profiler/libpf" import ( - "context" "math/rand/v2" "reflect" "time" @@ -13,24 +12,6 @@ import ( log "github.com/sirupsen/logrus" ) -// SleepWithJitter sleeps for baseDuration +/- jitter (jitter is [0..1]) -func SleepWithJitter(baseDuration time.Duration, jitter float64) { - time.Sleep(AddJitter(baseDuration, jitter)) -} - -// SleepWithJitterAndContext blocks for duration +/- jitter (jitter is [0..1]) or until ctx -// is canceled. -func SleepWithJitterAndContext(ctx context.Context, duration time.Duration, jitter float64) error { - tick := time.NewTicker(AddJitter(duration, jitter)) - defer tick.Stop() - select { - case <-ctx.Done(): - return ctx.Err() - case <-tick.C: - return nil - } -} - // AddJitter adds +/- jitter (jitter is [0..1]) to baseDuration func AddJitter(baseDuration time.Duration, jitter float64) time.Duration { if jitter < 0.0 || jitter > 1.0 { diff --git a/libpf/generics.go b/libpf/generics.go index 6a57009d4..5ba4b52fe 100644 --- a/libpf/generics.go +++ b/libpf/generics.go @@ -24,24 +24,6 @@ func MapKeysToSlice[K comparable, V any](m map[K]V) []K { return slice } -// MapValuesToSlice creates a slice from a map's values. -func MapValuesToSlice[K comparable, V any](m map[K]V) []V { - slice := make([]V, 0, len(m)) - for _, value := range m { - slice = append(slice, value) - } - return slice -} - -// SliceToSet creates a set from a slice, deduplicating it. -func SliceToSet[T comparable](s []T) Set[T] { - set := make(map[T]Void, len(s)) - for _, item := range s { - set[item] = Void{} - } - return set -} - // SliceAllEqual checks whether all items in a slice have a given value. func SliceAllEqual[T comparable](s []T, value T) bool { for _, item := range s { @@ -52,25 +34,3 @@ func SliceAllEqual[T comparable](s []T, value T) bool { return true } - -// SlicesEqual checks whether two slices are element-wise equal. -func SlicesEqual[T comparable](a, b []T) bool { - if len(a) != len(b) { - return false - } - for i := 0; i < len(a); i++ { - if a[i] != b[i] { - return false - } - } - return true -} - -// MapSlice returns a new slice by mapping given function over the input slice. -func MapSlice[T, V any](in []T, mapf func(T) V) []V { - ret := make([]V, len(in)) - for idx := range in { - ret[idx] = mapf(in[idx]) - } - return ret -} diff --git a/libpf/interpretertype.go b/libpf/interpretertype.go index d6be81372..6ce05e950 100644 --- a/libpf/interpretertype.go +++ b/libpf/interpretertype.go @@ -40,6 +40,12 @@ const ( // APMInt identifies the pseudo-interpreter for the APM integration. APMInt InterpreterType = 0x100 + + // Go identifies the pseudo-interpreter for Go custom labels support. + Go InterpreterType = 0x101 + + // CustomLabels identifies the pseudo-interpreter for native custom labels support. + CustomLabels InterpreterType = 0x102 ) // Frame converts the interpreter type into the corresponding frame type. diff --git a/libpf/pfelf/file.go b/libpf/pfelf/file.go index 5229eee49..93010b6a8 100644 --- a/libpf/pfelf/file.go +++ b/libpf/pfelf/file.go @@ -54,6 +54,12 @@ var ErrSymbolNotFound = errors.New("symbol not found") // ErrNotELF is returned when the file is not an ELF var ErrNotELF = errors.New("not an ELF file") +// ErrNoTbss is returned when the tbss section cannot be found +var ErrNoTbss = errors.New("no thread-local uninitialized data section (tbss)") + +// ErrNoTdata is returned when the tdata section cannot be found +var ErrNoTdata = errors.New("no thread-local initialized data section (tdata)") + // File represents an open ELF file type File struct { // closer is called internally when resources for this File are to be released @@ -419,6 +425,32 @@ func (f *File) Section(name string) *Section { return nil } +// Tbss gets the thread-local uninitialized data section +func (f *File) Tbss() (*Section, error) { + if err := f.LoadSections(); err != nil { + return nil, err + } + for _, sec := range f.Sections { + if sec.Type == elf.SHT_NOBITS && sec.Flags&elf.SHF_TLS != 0 { + return &sec, nil + } + } + return nil, ErrNoTbss +} + +// Tdata gets the thread-local initialized data section +func (f *File) Tdata() (*Section, error) { + if err := f.LoadSections(); err != nil { + return nil, err + } + for _, sec := range f.Sections { + if sec.Type == elf.SHT_PROGBITS && sec.Flags&elf.SHF_TLS != 0 { + return &sec, nil + } + } + return nil, ErrNoTdata +} + // ReadVirtualMemory reads bytes from given virtual address func (f *File) ReadVirtualMemory(p []byte, addr int64) (int, error) { if len(p) == 0 { diff --git a/libpf/trace.go b/libpf/trace.go index e18d454c8..dc997eb56 100644 --- a/libpf/trace.go +++ b/libpf/trace.go @@ -14,6 +14,7 @@ type Trace struct { MappingEnd []Address MappingFileOffsets []uint64 Hash TraceHash + CustomLabels map[string]string } // AppendFrame appends a frame to the columnar frame array without mapping information. diff --git a/main.go b/main.go index 17debe3b8..106413b8a 100644 --- a/main.go +++ b/main.go @@ -114,7 +114,6 @@ func mainWithExitCode() exitCode { log.Error(err) return exitFailure } - cfg.HostName, cfg.IPAddress = hostname, sourceIP rep, err := reporter.NewOTLP(&reporter.Config{ CollAgentAddr: cfg.CollAgentAddr, diff --git a/process/process.go b/process/process.go index 760462bbb..bf2a952ff 100644 --- a/process/process.go +++ b/process/process.go @@ -93,6 +93,8 @@ func parseMappings(mapsFile io.Reader) ([]Mapping, error) { } bufPool.Put(scanBuf) }() + + lastPath := "" scanner.Buffer(*scanBuf, 8192) for scanner.Scan() { var fields [6]string @@ -146,7 +148,14 @@ func parseMappings(mapsFile io.Reader) ([]Mapping, error) { } } else { path = trimMappingPath(path) - path = strings.Clone(path) + if path == lastPath { + // Take advantage of the fact that mappings are sorted by path + // and avoid allocating the same string multiple times. + path = lastPath + } else { + path = strings.Clone(path) + lastPath = path + } } vaddr := util.HexToUint64(addrs[0]) diff --git a/processmanager/ebpf/ebpf.go b/processmanager/ebpf/ebpf.go index bb07da91c..60dc9bafb 100644 --- a/processmanager/ebpf/ebpf.go +++ b/processmanager/ebpf/ebpf.go @@ -103,6 +103,8 @@ type ebpfMapsImpl struct { rubyProcs *cebpf.Map v8Procs *cebpf.Map apmIntProcs *cebpf.Map + goProcs *cebpf.Map + clProcs *cebpf.Map // Stackdelta and process related eBPF maps exeIDToStackDeltaMaps []*cebpf.Map @@ -205,6 +207,18 @@ func LoadMaps(ctx context.Context, maps map[string]*cebpf.Map) (EbpfHandler, err } impl.apmIntProcs = apmIntProcs + goProcs, ok := maps["go_procs"] + if !ok { + log.Fatalf("Map go_procs is not available") + } + impl.goProcs = goProcs + + clProcs, ok := maps["cl_procs"] + if !ok { + log.Fatalf("Map cl_procs is not available") + } + impl.clProcs = clProcs + impl.stackDeltaPageToInfo, ok = maps["stack_delta_page_to_info"] if !ok { log.Fatalf("Map stack_delta_page_to_info is not available") @@ -296,6 +310,10 @@ func (impl *ebpfMapsImpl) getInterpreterTypeMap(typ libpf.InterpreterType) (*ceb return impl.v8Procs, nil case libpf.APMInt: return impl.apmIntProcs, nil + case libpf.Go: + return impl.goProcs, nil + case libpf.CustomLabels: + return impl.clProcs, nil default: return nil, fmt.Errorf("type %d is not (yet) supported", typ) } @@ -749,7 +767,14 @@ func (impl *ebpfMapsImpl) UpdatePidPageMappingInfo(pid libpf.PID, prefix lpm.Pre func (impl *ebpfMapsImpl) DeletePidPageMappingInfo(pid libpf.PID, prefixes []lpm.Prefix) (int, error) { if impl.hasLPMTrieBatchOperations { - return impl.DeletePidPageMappingInfoBatch(pid, prefixes) + deleted, err := impl.DeletePidPageMappingInfoBatch(pid, prefixes) + if err != nil { + // BatchDelete may return early and not run to completion. If that happens, + // fall back to a single Delete pass to avoid leaking map entries. + deleted2, _ := impl.DeletePidPageMappingInfoSingle(pid, prefixes) + return (deleted + deleted2), err + } + return deleted, nil } return impl.DeletePidPageMappingInfoSingle(pid, prefixes) } diff --git a/processmanager/execinfomanager/manager.go b/processmanager/execinfomanager/manager.go index 135da84d9..89f489277 100644 --- a/processmanager/execinfomanager/manager.go +++ b/processmanager/execinfomanager/manager.go @@ -18,7 +18,9 @@ import ( "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/interpreter" "go.opentelemetry.io/ebpf-profiler/interpreter/apmint" + "go.opentelemetry.io/ebpf-profiler/interpreter/customlabels" "go.opentelemetry.io/ebpf-profiler/interpreter/dotnet" + "go.opentelemetry.io/ebpf-profiler/interpreter/golang" "go.opentelemetry.io/ebpf-profiler/interpreter/hotspot" "go.opentelemetry.io/ebpf-profiler/interpreter/nodev8" "go.opentelemetry.io/ebpf-profiler/interpreter/perl" @@ -100,6 +102,7 @@ func NewExecutableInfoManager( sdp nativeunwind.StackDeltaProvider, ebpf pmebpf.EbpfHandler, includeTracers types.IncludedTracers, + collectCustomLabels bool, ) (*ExecutableInfoManager, error) { // Initialize interpreter loaders. interpreterLoaders := make([]interpreter.Loader, 0) @@ -126,6 +129,9 @@ func NewExecutableInfoManager( } interpreterLoaders = append(interpreterLoaders, apmint.Loader) + if collectCustomLabels { + interpreterLoaders = append(interpreterLoaders, golang.Loader, customlabels.Loader) + } deferredFileIDs, err := lru.NewSynced[host.FileID, libpf.Void](deferredFileIDSize, func(id host.FileID) uint32 { return uint32(id) }) diff --git a/processmanager/manager.go b/processmanager/manager.go index f8de293f8..1e3908b44 100644 --- a/processmanager/manager.go +++ b/processmanager/manager.go @@ -66,7 +66,9 @@ var ( // implementation. func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInterval time.Duration, ebpf pmebpf.EbpfHandler, fileIDMapper FileIDMapper, symbolReporter reporter.SymbolReporter, - sdp nativeunwind.StackDeltaProvider, filterErrorFrames bool) (*ProcessManager, error) { + sdp nativeunwind.StackDeltaProvider, filterErrorFrames bool, + includeEnvVars libpf.Set[string], + collectCustomLabels bool) (*ProcessManager, error) { if fileIDMapper == nil { var err error fileIDMapper, err = newFileIDMapper(lruFileIDCacheSize) @@ -82,7 +84,7 @@ func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInter } elfInfoCache.SetLifetime(elfInfoCacheTTL) - em, err := eim.NewExecutableInfoManager(sdp, ebpf, includeTracers) + em, err := eim.NewExecutableInfoManager(sdp, ebpf, includeTracers, collectCustomLabels) if err != nil { return nil, fmt.Errorf("unable to create ExecutableInfoManager: %v", err) } @@ -101,6 +103,7 @@ func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInter reporter: symbolReporter, metricsAddSlice: metrics.AddSlice, filterErrorFrames: filterErrorFrames, + includeEnvVars: includeEnvVars, } collectInterpreterMetrics(ctx, pm, monitorInterval) @@ -215,9 +218,10 @@ func (pm *ProcessManager) ConvertTrace(trace *host.Trace) (newTrace *libpf.Trace traceLen := len(trace.Frames) newTrace = &libpf.Trace{ - Files: make([]libpf.FileID, 0, traceLen), - Linenos: make([]libpf.AddressOrLineno, 0, traceLen), - FrameTypes: make([]libpf.FrameType, 0, traceLen), + Files: make([]libpf.FileID, 0, traceLen), + Linenos: make([]libpf.AddressOrLineno, 0, traceLen), + FrameTypes: make([]libpf.FrameType, 0, traceLen), + CustomLabels: trace.CustomLabels, } for i := 0; i < traceLen; i++ { diff --git a/processmanager/manager_test.go b/processmanager/manager_test.go index a5ca1ed71..af303620a 100644 --- a/processmanager/manager_test.go +++ b/processmanager/manager_test.go @@ -318,6 +318,8 @@ func TestInterpreterConvertTrace(t *testing.T) { nil, &symbolReporterMockup{}, nil, + true, + libpf.Set[string]{}, true) require.NoError(t, err) @@ -403,6 +405,8 @@ func TestNewMapping(t *testing.T) { NewMapFileIDMapper(), symRepMockup, &dummyProvider, + true, + libpf.Set[string]{}, true) require.NoError(t, err) @@ -588,6 +592,8 @@ func TestProcExit(t *testing.T) { NewMapFileIDMapper(), repMockup, &dummyProvider, + true, + libpf.Set[string]{}, true) require.NoError(t, err) defer cancel() diff --git a/processmanager/processinfo.go b/processmanager/processinfo.go index d0e7cf4c6..9582e72a6 100644 --- a/processmanager/processinfo.go +++ b/processmanager/processinfo.go @@ -16,6 +16,7 @@ import ( "fmt" "os" "path" + "strings" "syscall" "time" @@ -88,8 +89,33 @@ func (pm *ProcessManager) updatePidInformation(pid libpf.PID, m *Mapping) (bool, if name, err := os.ReadFile(fmt.Sprintf("/prod/%d/comm", pid)); err == nil { processName = string(name) } + + envVarMap := make(map[string]string, len(pm.includeEnvVars)) + if len(pm.includeEnvVars) > 0 { + if envVars, err := os.ReadFile(fmt.Sprintf("/proc/%d/environ", pid)); err == nil { + // environ has environment variables separated by a null byte (hex: 00) + splittedVars := strings.Split(string(envVars), "\000") + for _, envVar := range splittedVars { + keyValuePair := strings.SplitN(envVar, "=", 2) + + // If the entry could not be split at a '=', ignore it + // (last entry of environ might be empty) + if len(keyValuePair) != 2 { + continue + } + + if _, ok := pm.includeEnvVars[keyValuePair[0]]; ok { + envVarMap[keyValuePair[0]] = keyValuePair[1] + } + } + } + } + info = &processInfo{ - meta: ProcessMeta{Name: processName, Executable: exePath}, + meta: ProcessMeta{ + Name: processName, + Executable: exePath, + EnvVariables: envVarMap}, mappings: make(map[libpf.Address]*Mapping), mappingsByFileID: make(map[host.FileID]map[libpf.Address]*Mapping), tsdInfo: nil, @@ -561,6 +587,17 @@ func (pm *ProcessManager) SynchronizeProcess(pr process.Process) { pid := pr.PID() log.Debugf("= PID: %v", pid) + // Abort early if process is waiting for cleanup in ProcessedUntil + pm.mu.Lock() + _, ok := pm.exitEvents[pid] + pm.mu.Unlock() + + if ok { + log.Debugf("PID %v waiting for cleanup, aborting SynchronizeProcess", pid) + pm.ebpf.RemoveReportedPID(pid) + return + } + pm.mappingStats.numProcAttempts.Add(1) start := time.Now() mappings, err := pr.GetMappings() diff --git a/processmanager/types.go b/processmanager/types.go index c70ec5729..70f7e528c 100644 --- a/processmanager/types.go +++ b/processmanager/types.go @@ -96,6 +96,9 @@ type ProcessManager struct { // filterErrorFrames determines whether error frames are dropped by `ConvertTrace`. filterErrorFrames bool + + // includeEnvVars holds a list of env vars that should be captured from processes + includeEnvVars libpf.Set[string] } // Mapping represents an executable memory mapping of a process. @@ -141,6 +144,8 @@ type ProcessMeta struct { Name string // executable path retrieved from /proc/PID/exe Executable string + // process env vars from /proc/PID/environ + EnvVariables map[string]string } // processInfo contains information about the executable mappings diff --git a/remotememory/remotememory.go b/remotememory/remotememory.go index 00ca1ed71..52971490e 100644 --- a/remotememory/remotememory.go +++ b/remotememory/remotememory.go @@ -71,6 +71,15 @@ func (rm RemoteMemory) Uint32(addr libpf.Address) uint32 { return binary.LittleEndian.Uint32(buf[:]) } +// Uint32Checked reads a 32-bit unsigned integer from remote memory +func (rm RemoteMemory) Uint32Checked(addr libpf.Address) (uint32, error) { + var buf [4]byte + if err := rm.Read(addr, buf[:]); err != nil { + return 0, err + } + return binary.LittleEndian.Uint32(buf[:]), nil +} + // Uint64 reads a 64-bit unsigned integer from remote memory func (rm RemoteMemory) Uint64(addr libpf.Address) uint64 { var buf [8]byte diff --git a/reporter/base_reporter.go b/reporter/base_reporter.go index 66f5db90d..71c9c3187 100644 --- a/reporter/base_reporter.go +++ b/reporter/base_reporter.go @@ -143,6 +143,7 @@ func (b *baseReporter) ReportTraceEvent(trace *libpf.Trace, meta *samples.TraceE MappingFileOffsets: trace.MappingFileOffsets, Timestamps: []uint64{uint64(meta.Timestamp)}, OffTimes: []int64{meta.OffTime}, + EnvVars: meta.EnvVars, } } diff --git a/reporter/internal/pdata/generate.go b/reporter/internal/pdata/generate.go index 6570abc30..0e55366bc 100644 --- a/reporter/internal/pdata/generate.go +++ b/reporter/internal/pdata/generate.go @@ -12,7 +12,9 @@ import ( log "github.com/sirupsen/logrus" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pprofile" - semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + "go.opentelemetry.io/otel/attribute" + + semconv "go.opentelemetry.io/otel/semconv/v1.30.0" "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/reporter/samples" @@ -118,7 +120,7 @@ func (p *Pdata) setProfile( loc := profile.LocationTable().AppendEmpty() loc.SetAddress(uint64(traceInfo.Linenos[i])) attrMgr.AppendOptionalString(loc.AttributeIndices(), - "profile.frame.type", traceInfo.FrameTypes[i].String()) + semconv.ProfileFrameTypeKey, traceInfo.FrameTypes[i].String()) switch frameKind := traceInfo.FrameTypes[i]; frameKind { case libpf.NativeFrame: @@ -153,9 +155,11 @@ func (p *Pdata) setProfile( // semantic convention for build_id, replace these hard coded // strings. attrMgr.AppendOptionalString(mapping.AttributeIndices(), - "process.executable.build_id.gnu", ei.GnuBuildID) + semconv.ProcessExecutableBuildIDGnuKey, + ei.GnuBuildID) attrMgr.AppendOptionalString(mapping.AttributeIndices(), - "process.executable.build_id.htlhash", traceInfo.Files[i].StringNoQuotes()) + semconv.ProcessExecutableBuildIDHtlhashKey, + traceInfo.Files[i].StringNoQuotes()) } loc.SetMappingIndex(locationMappingIndex) case libpf.AbortFrame: @@ -219,6 +223,13 @@ func (p *Pdata) setProfile( attrMgr.AppendInt(sample.AttributeIndices(), semconv.ProcessPIDKey, traceKey.Pid) + for key, value := range traceInfo.EnvVars { + attrMgr.AppendOptionalString( + sample.AttributeIndices(), + attribute.Key("env."+key), + value) + } + if p.ExtraSampleAttrProd != nil { extra := p.ExtraSampleAttrProd.ExtraSampleAttrs(attrMgr, traceKey.ExtraMeta) sample.AttributeIndices().Append(extra...) @@ -306,6 +317,7 @@ func getDummyMappingIndex(fileIDtoMapping map[libpf.FileID]int32, mapping := profile.MappingTable().AppendEmpty() mapping.SetFilenameStrindex(getStringMapIndex(stringMap, "")) attrMgr.AppendOptionalString(mapping.AttributeIndices(), - "process.executable.build_id.htlhash", fileID.StringNoQuotes()) + semconv.ProcessExecutableBuildIDHtlhashKey, + fileID.StringNoQuotes()) return locationMappingIndex } diff --git a/reporter/otlp_reporter.go b/reporter/otlp_reporter.go index 6c2e3fccb..a0d005573 100644 --- a/reporter/otlp_reporter.go +++ b/reporter/otlp_reporter.go @@ -14,7 +14,7 @@ import ( log "github.com/sirupsen/logrus" "go.opentelemetry.io/collector/pdata/pprofile" "go.opentelemetry.io/collector/pdata/pprofile/pprofileotlp" - semconv "go.opentelemetry.io/otel/semconv/v1.22.0" + semconv "go.opentelemetry.io/otel/semconv/v1.30.0" "google.golang.org/grpc" "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" diff --git a/reporter/samples/attrmgr_test.go b/reporter/samples/attrmgr_test.go index 34e82ddca..bd03376b4 100644 --- a/reporter/samples/attrmgr_test.go +++ b/reporter/samples/attrmgr_test.go @@ -12,7 +12,7 @@ import ( "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pprofile" "go.opentelemetry.io/ebpf-profiler/libpf" - semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + semconv "go.opentelemetry.io/otel/semconv/v1.30.0" ) type attributeStruct struct { diff --git a/reporter/samples/samples.go b/reporter/samples/samples.go index 54b92d3a3..466c5ff67 100644 --- a/reporter/samples/samples.go +++ b/reporter/samples/samples.go @@ -15,6 +15,7 @@ type TraceEventMeta struct { CPU int Origin libpf.Origin OffTime int64 + EnvVars map[string]string } // TraceEvents holds known information about a trace. @@ -27,6 +28,7 @@ type TraceEvents struct { MappingFileOffsets []uint64 Timestamps []uint64 // in nanoseconds OffTimes []int64 // in nanoseconds + EnvVars map[string]string } // TraceAndMetaKey is the deduplication key for samples. This **must always** @@ -44,6 +46,7 @@ type TraceAndMetaKey struct { ProcessName string // Executable path is retrieved from /proc/PID/exe ExecutablePath string + // ExtraMeta stores extra meta info that may have been produced by a // `SampleAttrProducer` instance. May be nil. ExtraMeta any diff --git a/rust-crates/custom-labels-example/Cargo.toml b/rust-crates/custom-labels-example/Cargo.toml new file mode 100644 index 000000000..a3c8df97a --- /dev/null +++ b/rust-crates/custom-labels-example/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "custom-labels-example" +edition = "2021" +# Once we have the Rust workspace stuff from upstream, set these to true. +# +# version.workspace = true +# rust-version.workspace = true +# license.workspace = true + +[dependencies] +custom-labels = "0.4.0" +rand = "0.9.0" + +[build-dependencies] +custom-labels = "0.4.0" \ No newline at end of file diff --git a/rust-crates/custom-labels-example/build.rs b/rust-crates/custom-labels-example/build.rs new file mode 100644 index 000000000..be4c68863 --- /dev/null +++ b/rust-crates/custom-labels-example/build.rs @@ -0,0 +1,3 @@ +fn main() { + custom_labels::build::emit_build_instructions(); +} diff --git a/rust-crates/custom-labels-example/src/main.rs b/rust-crates/custom-labels-example/src/main.rs new file mode 100644 index 000000000..97510ddf0 --- /dev/null +++ b/rust-crates/custom-labels-example/src/main.rs @@ -0,0 +1,29 @@ +use std::time::{Duration, Instant}; + +use rand::distr::Alphanumeric; +use rand::Rng; + +fn rand_str() -> String { + String::from_utf8( + rand::rng() + .sample_iter(&Alphanumeric) + .take(16) + .collect::>(), + ) + .unwrap() +} + +fn main() { + let mut last_update = Instant::now(); + + loop { + custom_labels::with_label("l1", rand_str(), || { + custom_labels::with_label("l2", rand_str(), || loop { + if last_update.elapsed() >= Duration::from_secs(10) { + break; + } + }) + }); + last_update = Instant::now(); + } +} diff --git a/rust-crates/symblib-capi/c/symblib.h b/rust-crates/symblib-capi/c/symblib.h index df3c2be0a..24141aa11 100644 --- a/rust-crates/symblib-capi/c/symblib.h +++ b/rust-crates/symblib-capi/c/symblib.h @@ -138,6 +138,44 @@ extern SymblibStatus symblib_retpadextr_submit( // Frees a return pad extractor. extern void symblib_retpadextr_free(SymblibRetPadExtractor* extr); +// Opaque handle to SymblibPointResolver. +typedef struct SymblibPointResolver SymblibPointResolver; + +// Creates a new SymblibPointResolver. +extern SymblibStatus symblib_goruntime_new( + const char* executable, + SymblibPointResolver** runtime // out arg +); + +// Frees a SymblibPointResolver. +extern void symblib_goruntime_free(SymblibPointResolver* runtime); + +// Contains information about a symbol and its origin. +typedef struct SymblibResolvedSymbol { + uint64_t start_addr; + SymblibString function_name; + SymblibString file_name; + uint32_t line_number; +} SymblibResolvedSymbol; + +// Enveloping struct that contains len number of symbols in data. +typedef struct SymblibSlice_SymblibResolvedSymbol { + const SymblibResolvedSymbol* data; + size_t len; +} SymblibSlice_SymblibResolvedSymbol; + +// Single point lookup for pc using SymblibPointResolver. +SymblibStatus symblib_point_resolver_symbols_for_pc( + const SymblibPointResolver* resolver, + uint64_t pc, + SymblibSlice_SymblibResolvedSymbol** symbols // out arg +); + +// Frees a SymblibSlice_SymblibResolvedSymbol. +void symblib_slice_symblibresolved_symbol_free( + SymblibSlice_SymblibResolvedSymbol* slice +); + #ifdef __cplusplus } #endif diff --git a/rust-crates/symblib-capi/src/gosym.rs b/rust-crates/symblib-capi/src/gosym.rs new file mode 100644 index 000000000..186388faf --- /dev/null +++ b/rust-crates/symblib-capi/src/gosym.rs @@ -0,0 +1,67 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +use crate::{FfiResult, StatusCode, SymblibPointResolver}; +use std::ffi::{c_char, CStr}; +use std::mem; +use std::path::Path; +use symblib::symbconv::{PointResolver, ResolvedSymbol, Result as SymconvResult}; +use symblib::{gosym::GoRuntimeInfo, objfile}; + +pub struct SymblibGoRuntime { + #[allow(unused)] + obj: Box, + runtime: GoRuntimeInfo<'static>, +} + +impl PointResolver for SymblibGoRuntime { + fn symbols_for_pc(&self, pc: symblib::VirtAddr) -> SymconvResult> { + self.runtime.symbols_for_pc(pc) + } +} + +#[no_mangle] +pub unsafe extern "C" fn symblib_goruntime_new( + executable: *const c_char, + runtime: *mut *mut SymblibPointResolver, +) -> StatusCode { + match goruntime_new_impl(executable, runtime) { + Ok(()) => StatusCode::Ok, + Err(e) => e, + } +} + +unsafe fn goruntime_new_impl( + executable: *const c_char, + runtime: *mut *mut SymblibPointResolver, +) -> FfiResult { + let executable = CStr::from_ptr(executable) + .to_str() + .map(Path::new) + .map_err(|_| StatusCode::BadUtf8)?; + + let obj = Box::new(objfile::File::load(executable)?); + let obj_reader = obj.parse()?; + let go_runtime = GoRuntimeInfo::open(&obj_reader)?; + + // Transmute away lifetime to allow for self-referential struct. + let go_runtime: GoRuntimeInfo<'static> = mem::transmute(go_runtime); + + let resolver = SymblibGoRuntime { + obj, + runtime: go_runtime, + }; + + let point_resolver = Box::new(SymblibPointResolver::new( + Box::new(resolver) as Box + )); + *runtime = Box::into_raw(point_resolver); + Ok(()) +} + +#[no_mangle] +pub unsafe extern "C" fn symblib_goruntime_free(runtime: *mut SymblibPointResolver) { + if !runtime.is_null() { + drop(Box::from_raw(runtime)); + } +} diff --git a/rust-crates/symblib-capi/src/lib.rs b/rust-crates/symblib-capi/src/lib.rs index b6664ad70..fda7b8716 100644 --- a/rust-crates/symblib-capi/src/lib.rs +++ b/rust-crates/symblib-capi/src/lib.rs @@ -5,12 +5,16 @@ mod ffislice; mod ffistr; +mod gosym; +mod pointresolver; mod rangeextr; mod retpadextr; mod status; pub use ffislice::*; pub use ffistr::*; +pub use gosym::*; +pub use pointresolver::*; pub use rangeextr::*; pub use retpadextr::*; pub use status::*; diff --git a/rust-crates/symblib-capi/src/pointresolver.rs b/rust-crates/symblib-capi/src/pointresolver.rs new file mode 100644 index 000000000..9d4db88ec --- /dev/null +++ b/rust-crates/symblib-capi/src/pointresolver.rs @@ -0,0 +1,66 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +use crate::{StatusCode, SymblibSlice, SymblibString}; +use symblib::symbconv; +use symblib::symbconv::PointResolver; +use symblib::VirtAddr; + +#[repr(C)] +#[derive(Debug)] +pub struct SymblibResolvedSymbol { + pub start_addr: VirtAddr, + pub function_name: SymblibString, + pub file_name: SymblibString, // may be empty + pub line_number: u32, // 0 = unknown +} + +impl From for SymblibResolvedSymbol { + fn from(sym: symbconv::ResolvedSymbol) -> Self { + Self { + start_addr: sym.start_addr, + function_name: sym.function_name.into(), + file_name: sym.file_name.unwrap_or("".to_string()).into(), + line_number: sym.line_number.unwrap_or(0), + } + } +} + +#[repr(C)] +pub struct SymblibPointResolver { + inner: Box, +} + +impl SymblibPointResolver { + pub fn new(resolver: Box) -> Self { + Self { inner: resolver } + } +} + +#[no_mangle] +pub extern "C" fn symblib_point_resolver_symbols_for_pc( + resolver: &SymblibPointResolver, + pc: VirtAddr, + out_symbols: *mut *mut SymblibSlice, +) -> StatusCode { + let symbols: Vec<_> = match resolver.inner.symbols_for_pc(pc) { + Ok(syms) => syms.into_iter().map(Into::into).collect(), + Err(e) => return StatusCode::from(e), + }; + + unsafe { + *out_symbols = Box::into_raw(Box::new(symbols.into())); + } + StatusCode::Ok +} + +#[no_mangle] +pub extern "C" fn symblib_slice_symblibresolved_symbol_free( + slice: *mut SymblibSlice, +) { + if !slice.is_null() { + unsafe { + drop(Box::from_raw(slice)); + } + } +} diff --git a/rust-crates/symblib-capi/src/status.rs b/rust-crates/symblib-capi/src/status.rs index 54686aa2f..8ecfb69f2 100644 --- a/rust-crates/symblib-capi/src/status.rs +++ b/rust-crates/symblib-capi/src/status.rs @@ -4,7 +4,7 @@ //! Defines FFI error codes and their conversion from Rust error types. use std::io; -use symblib::{dwarf, objfile, retpads, symbconv}; +use symblib::{dwarf, gosym, objfile, retpads, symbconv}; pub type FfiResult = Result; @@ -42,6 +42,9 @@ pub enum StatusCode { #[error("The channel was already closed in a previous call")] AlreadyClosed = 8, + + #[error("Point resolver error")] + PointResolver = 9, } impl From for FfiResult { @@ -117,3 +120,9 @@ impl From for StatusCode { Self::Retpad } } + +impl From for StatusCode { + fn from(_: gosym::Error) -> Self { + StatusCode::Symbconv + } +} diff --git a/rust-crates/symblib/src/gosym/mod.rs b/rust-crates/symblib/src/gosym/mod.rs index 6f8394a4d..3fe129823 100644 --- a/rust-crates/symblib/src/gosym/mod.rs +++ b/rust-crates/symblib/src/gosym/mod.rs @@ -17,7 +17,7 @@ mod errors; pub use errors::*; mod raw; -use crate::{objfile, VirtAddr}; +use crate::{objfile, symbconv, VirtAddr}; use fallible_iterator::FallibleIterator; use std::ops::Range; @@ -540,3 +540,49 @@ fn range_rel2abs(base: VirtAddr, rng: Range) -> Range { + /// NOTE: this is currently doesn't support inline functions + fn symbols_for_pc(&self, pc: VirtAddr) -> symbconv::Result> { + let func = match self.find_func(pc) { + Ok(Some(func)) => func, + Ok(None) => return Ok(Vec::new()), + Err(e) => return Err(symbconv::Error::Go(symbconv::go::Error::Gosym(e))), + }; + + let mut symbols = Vec::new(); + let mut source_file = None; + let mut line_number = None; + + // For file mappings + let mut file_iter = func + .file_mapping() + .map_err(|e| symbconv::Error::Go(symbconv::go::Error::Gosym(e)))?; + while let Ok(Some((range, file))) = file_iter.next() { + if range.contains(&VirtAddr::from(pc)) { + source_file = Some(file.unwrap_or("").into()); + break; + } + } + + // For line mappings + let mut line_iter = func + .line_mapping() + .map_err(|e| symbconv::Error::Go(symbconv::go::Error::Gosym(e)))?; + while let Ok(Some((range, line))) = line_iter.next() { + if range.contains(&VirtAddr::from(pc)) { + line_number = Some(line.unwrap_or(0)); + break; + } + } + + symbols.push(symbconv::ResolvedSymbol { + start_addr: func.start_addr(), + function_name: func.name().ok().map(|s| s.to_string()), + file_name: source_file, + line_number: line_number, + }); + + Ok(symbols) + } +} diff --git a/rust-crates/symblib/src/symbconv/mod.rs b/rust-crates/symblib/src/symbconv/mod.rs index 4708a8426..dcb68a136 100644 --- a/rust-crates/symblib/src/symbconv/mod.rs +++ b/rust-crates/symblib/src/symbconv/mod.rs @@ -3,7 +3,7 @@ //! Extract symbol info and convert it to [`symbfile`] format. -use crate::{objfile, symbfile, AnyError}; +use crate::{objfile, symbfile, AnyError, VirtAddr}; use std::io; /// Result type shorthand. @@ -79,6 +79,27 @@ pub trait RangeExtractor { } } +/// Hold information about a symbol and its origin. +pub struct ResolvedSymbol { + /// Start address of a symbol + pub start_addr: VirtAddr, + /// Function name associated with an address. + pub function_name: Option, + /// File name that hold this function. + pub file_name: Option, + /// Line number associcated with this virtual address. + pub line_number: Option, +} + +/// Common interface to tesolve symbols for a specific program counter address. +pub trait PointResolver { + /// Returns all symbols that match the given program counter address. + /// + /// The returned vector contains all resolved symbols at the given address, + /// which can include both the direct function and any inline frames + fn symbols_for_pc(&self, pc: VirtAddr) -> Result>; +} + fn _assert_obj_safe(_: &dyn RangeExtractor) {} pub mod dwarf; diff --git a/support/ebpf/extmaps.h b/support/ebpf/extmaps.h index 6cd347f19..1650a0a1f 100644 --- a/support/ebpf/extmaps.h +++ b/support/ebpf/extmaps.h @@ -18,6 +18,7 @@ extern bpf_map_def inhibit_events; extern bpf_map_def interpreter_offsets; extern bpf_map_def system_config; extern bpf_map_def trace_events; +extern bpf_map_def go_procs; #if defined(TESTING_COREDUMP) diff --git a/support/ebpf/go_labels.ebpf.c b/support/ebpf/go_labels.ebpf.c new file mode 100644 index 000000000..89db4f309 --- /dev/null +++ b/support/ebpf/go_labels.ebpf.c @@ -0,0 +1,270 @@ +// This file contains the code for extracting custom labels from Go runtime. + +#include "bpfdefs.h" +#include "kernel.h" +#include "tracemgmt.h" +#include "tsd.h" +#include "types.h" +#include "util.h" + +static inline __attribute__((__always_inline__)) void +process_value(GoMapBucket *map_value, CustomLabelsArray *out, unsigned i) +{ + if (map_value->tophash[i] == 0) + return; + if (out->len >= MAX_CUSTOM_LABELS) + return; + CustomLabel *lbl = &out->labels[out->len]; + if (map_value->keys[i].str != NULL) { + unsigned klen = MIN(map_value->keys[i].len, CUSTOM_LABEL_MAX_KEY_LEN - 1); + long res = bpf_probe_read_user(lbl->key, klen, map_value->keys[i].str); + if (res) { + DEBUG_PRINT( + "cl: failed to read key for custom label (%lx): %ld", + (unsigned long)map_value->keys[i].str, + res); + return; + } + unsigned vlen = MIN(map_value->values[i].len, CUSTOM_LABEL_MAX_VAL_LEN - 1); + res = bpf_probe_read_user(lbl->val, vlen, map_value->values[i].str); + if (res) { + DEBUG_PRINT("cl: failed to read value for custom label: %ld", res); + return; + } + } + out->len++; +} + +static inline __attribute__((__always_inline__)) bool +process_bucket(PerCPURecord *record, void *label_buckets, int j) +{ + CustomLabelsArray *out = &record->trace.custom_labels; + GoMapBucket *map_value = &record->goMapBucket; + long res = + bpf_probe_read_user(map_value, sizeof(GoMapBucket), label_buckets + (j * sizeof(GoMapBucket))); + if (res < 0) { + return false; + } + + process_value(map_value, out, 0); + process_value(map_value, out, 1); + process_value(map_value, out, 2); + process_value(map_value, out, 3); + process_value(map_value, out, 4); + process_value(map_value, out, 5); + process_value(map_value, out, 6); + process_value(map_value, out, 7); + + return false; +} + +static inline __attribute__((__always_inline__)) void +process_slice_pair(PerCPURecord *record, struct GoSlice *labels_slice, int i) +{ + CustomLabelsArray *out = &record->trace.custom_labels; + if (out->len >= MAX_CUSTOM_LABELS) + return; + + CustomLabel *lbl = &out->labels[out->len]; + void *str_addr = (char *)labels_slice->array + i * sizeof(struct GoString) * 2; + long res = bpf_probe_read_user(&record->labels, sizeof(struct GoString) * 2, str_addr); + if (res < 0) { + DEBUG_PRINT( + "cl: failed to read strings from labels slice (%lx): %ld", (unsigned long)str_addr, res); + return; + } + unsigned klen = MIN(record->labels[0].len, CUSTOM_LABEL_MAX_KEY_LEN - 1); + res = bpf_probe_read_user(lbl->key, klen, record->labels[0].str); + if (res) { + DEBUG_PRINT( + "cl: failed to read key for custom label (%lx): %ld", + (unsigned long)record->labels[0].str, + res); + return; + } + unsigned vlen = MIN(record->labels[1].len, CUSTOM_LABEL_MAX_VAL_LEN - 1); + res = bpf_probe_read_user(lbl->val, vlen, record->labels[1].str); + if (res) { + DEBUG_PRINT( + "cl: failed to read key for custom label (%lx): %ld", + (unsigned long)record->labels[1].str, + res); + return; + } + out->len++; +} + +static inline __attribute__((__always_inline__)) bool +get_go_custom_labels_from_slice(struct pt_regs *ctx, PerCPURecord *record, void *labels_slice_ptr) +{ + // https://github.com/golang/go/blob/80e2e474/src/runtime/pprof/label.go#L20 + struct GoSlice labels_slice; + long res = bpf_probe_read_user(&labels_slice, sizeof(struct GoSlice), labels_slice_ptr); + if (res < 0) { + DEBUG_PRINT( + "cl: failed to read value for labels slice (%lx): %ld", (unsigned long)labels_slice_ptr, res); + return false; + } + + u64 label_count = MIN(MAX_CUSTOM_LABELS, labels_slice.len); + switch (label_count) { + case 10: process_slice_pair(record, &labels_slice, 9); + case 9: process_slice_pair(record, &labels_slice, 8); + case 8: process_slice_pair(record, &labels_slice, 7); + case 7: process_slice_pair(record, &labels_slice, 6); + case 6: process_slice_pair(record, &labels_slice, 5); + case 5: process_slice_pair(record, &labels_slice, 4); + case 4: process_slice_pair(record, &labels_slice, 3); + case 3: process_slice_pair(record, &labels_slice, 2); + case 2: process_slice_pair(record, &labels_slice, 1); + case 1: process_slice_pair(record, &labels_slice, 0); + } + + return true; +} + +static inline __attribute__((__always_inline__)) bool get_go_custom_labels_from_map( + struct pt_regs *ctx, PerCPURecord *record, void *labels_map_ptr_ptr, GoCustomLabelsOffsets *offs) +{ + void *labels_map_ptr; + long res = bpf_probe_read_user(&labels_map_ptr, sizeof(labels_map_ptr), labels_map_ptr_ptr); + if (res < 0) { + DEBUG_PRINT( + "cl: failed to read value for labels_map_ptr (%lx): %ld", + (unsigned long)labels_map_ptr_ptr, + res); + return false; + } + + u64 labels_count = 0; + res = bpf_probe_read_user(&labels_count, sizeof(labels_count), labels_map_ptr + offs->hmap_count); + if (res < 0) { + DEBUG_PRINT("cl: failed to read value for labels_count: %ld", res); + return false; + } + if (labels_count == 0) { + DEBUG_PRINT("cl: no labels"); + return false; + } + + unsigned char log_2_bucket_count; + res = bpf_probe_read_user( + &log_2_bucket_count, sizeof(log_2_bucket_count), labels_map_ptr + offs->hmap_log2_bucket_count); + if (res < 0) { + DEBUG_PRINT("cl: failed to read value for bucket_count: %ld", res); + return false; + } + void *label_buckets; + res = + bpf_probe_read_user(&label_buckets, sizeof(label_buckets), labels_map_ptr + offs->hmap_buckets); + if (res < 0) { + DEBUG_PRINT("cl: failed to read value for label_buckets: %ld", res); + return false; + } + + // Manually unroll loop to support 4.19 kernel, auto unroll doesn't work as well + // and we can't support as many buckets. + u64 bucket_count = MIN(MAX_CUSTOM_LABELS, 1 << log_2_bucket_count); + switch (bucket_count) { + case 10: + if (process_bucket(record, label_buckets, 9)) + return true; + case 9: + if (process_bucket(record, label_buckets, 8)) + return true; + case 8: + if (process_bucket(record, label_buckets, 7)) + return true; + case 7: + if (process_bucket(record, label_buckets, 6)) + return true; + case 6: + if (process_bucket(record, label_buckets, 5)) + return true; + case 5: + if (process_bucket(record, label_buckets, 4)) + return true; + case 4: + if (process_bucket(record, label_buckets, 3)) + return true; + case 3: + if (process_bucket(record, label_buckets, 2)) + return true; + case 2: + if (process_bucket(record, label_buckets, 1)) + return true; + case 1: + if (process_bucket(record, label_buckets, 0)) + return true; + } + + return false; +} + +// Go processes store the current goroutine in thread local store. From there +// this reads the g (aka goroutine) struct, then the m (the actual operating +// system thread) of that goroutine, and finally curg (current goroutine). This +// chain is necessary because getg().m.curg points to the current user g +// assigned to the thread (curg == getg() when not on the system stack). curg +// may be nil if there is no user g, such as when running in the scheduler. If +// curg is nil, then g is either a system stack (called g0) or a signal handler +// g (gsignal). Neither one will ever have label. +static inline __attribute__((__always_inline__)) bool +get_go_custom_labels(struct pt_regs *ctx, PerCPURecord *record, GoCustomLabelsOffsets *offs) +{ + long res; + + size_t curg_ptr_addr; + res = bpf_probe_read_user( + &curg_ptr_addr, sizeof(void *), (void *)(record->customLabelsState.go_m_ptr + offs->curg)); + if (res < 0) { + DEBUG_PRINT("cl: failed to read value for m_ptr->curg: %ld", res); + return false; + } + + void *labels_ptr; + res = bpf_probe_read_user(&labels_ptr, sizeof(void *), (void *)(curg_ptr_addr + offs->labels)); + if (res < 0) { + DEBUG_PRINT( + "cl: failed to read value for curg->labels (%lx->%lx): %ld", + (unsigned long)curg_ptr_addr, + (unsigned long)offs->labels, + res); + return false; + } + + if (offs->hmap_buckets == 0) { + // go 1.24+ labels is a slice + return get_go_custom_labels_from_slice(ctx, record, labels_ptr); + } + + // go 1.23- labels is a map + return get_go_custom_labels_from_map(ctx, record, labels_ptr, offs); +} + +// go_labels is the entrypoint for extracting custom labels from Go runtime. +static inline __attribute__((__always_inline__)) int go_labels(struct pt_regs *ctx) +{ + PerCPURecord *record = get_per_cpu_record(); + if (!record) + return -1; + + u32 pid = record->trace.pid; + GoCustomLabelsOffsets *offsets = bpf_map_lookup_elem(&go_procs, &pid); + if (!offsets) { + DEBUG_PRINT("cl: no offsets, %d not recognized as a go binary", pid); + return -1; + } + DEBUG_PRINT( + "cl: go offsets found, %d recognized as a go binary: m_ptr: %lx", + pid, + (unsigned long)record->customLabelsState.go_m_ptr); + bool success = get_go_custom_labels(ctx, record, offsets); + if (!success) { + increment_metric(metricID_UnwindGoCustomLabelsFailures); + } + + tail_call(ctx, PROG_UNWIND_STOP); + return 0; +} +MULTI_USE_FUNC(go_labels) diff --git a/support/ebpf/interpreter_dispatcher.ebpf.c b/support/ebpf/interpreter_dispatcher.ebpf.c index 934855681..829263d0b 100644 --- a/support/ebpf/interpreter_dispatcher.ebpf.c +++ b/support/ebpf/interpreter_dispatcher.ebpf.c @@ -3,9 +3,11 @@ // perf event and will call the appropriate tracer for a given process #include "bpfdefs.h" +#include "kernel.h" #include "tracemgmt.h" #include "tsd.h" #include "types.h" +#include "util.h" // Begin shared maps @@ -124,6 +126,173 @@ bpf_map_def SEC("maps") apm_int_procs = { .max_entries = 128, }; +bpf_map_def SEC("maps") go_procs = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(pid_t), + .value_size = sizeof(GoCustomLabelsOffsets), + .max_entries = 128, +}; + +bpf_map_def SEC("maps") cl_procs = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(pid_t), + .value_size = sizeof(NativeCustomLabelsProcInfo), + .max_entries = 128, +}; + +static inline __attribute__((__always_inline__)) void * +get_m_ptr(struct GoCustomLabelsOffsets *offs, UnwindState *state) +{ + long res; + + size_t g_addr; +#if defined(__x86_64__) + u64 g_addr_offset = 0xfffffffffffffff8; + void *tls_base = NULL; + res = tsd_get_base(&tls_base); + if (res < 0) { + DEBUG_PRINT("cl: failed to get tsd base; can't read m_ptr"); + return NULL; + } + + res = bpf_probe_read_user(&g_addr, sizeof(void *), (void *)((u64)tls_base + g_addr_offset)); + if (res < 0) { + DEBUG_PRINT("cl: failed to read g_addr, tls_base(%lx)", (unsigned long)tls_base); + return NULL; + } +#elif defined(__aarch64__) + g_addr = state->r28; +#endif + + DEBUG_PRINT("cl: reading m_ptr_addr at 0x%lx + 0x%x", g_addr, offs->m_offset); + void *m_ptr_addr; + res = bpf_probe_read_user(&m_ptr_addr, sizeof(void *), (void *)(g_addr + offs->m_offset)); + if (res < 0) { + DEBUG_PRINT("cl: failed m_ptr_addr"); + return NULL; + } + + return m_ptr_addr; +} + +static inline __attribute__((__always_inline__)) void +maybe_add_go_custom_labels(struct pt_regs *ctx, PerCPURecord *record) +{ + u32 pid = record->trace.pid; + // The Go label extraction code is too big to fit in this program, so we need to + // tail call it, in order to keep the hashing and clearing code in this program it + // will tail call back to us with this bool set. + if (!record->state.processed_go_labels) { + GoCustomLabelsOffsets *offsets = bpf_map_lookup_elem(&go_procs, &pid); + if (!offsets) { + DEBUG_PRINT("cl: no offsets, %d not recognized as a go binary", pid); + return; + } + + void *m_ptr_addr = get_m_ptr(offsets, &record->state); + if (!m_ptr_addr) { + return; + } + record->customLabelsState.go_m_ptr = m_ptr_addr; + + DEBUG_PRINT("cl: trace is within a process with Go custom labels enabled"); + increment_metric(metricID_UnwindGoCustomLabelsAttempts); + record->state.processed_go_labels = true; + tail_call(ctx, PROG_GO_LABELS); + } +} + +static inline __attribute__((__always_inline__)) bool +get_native_custom_labels(PerCPURecord *record, NativeCustomLabelsProcInfo *proc) +{ + u64 tsd_base; + if (tsd_get_base((void **)&tsd_base) != 0) { + increment_metric(metricID_UnwindNativeCustomLabelsErrReadTsdBase); + DEBUG_PRINT("cl: failed to get TSD base for native custom labels"); + return false; + } + + u64 offset = tsd_base + proc->tls_offset; + DEBUG_PRINT("cl: native custom labels data at 0x%llx", offset); + + NativeCustomLabelsSet *p_current_set; + int err; + if ((err = bpf_probe_read_user(&p_current_set, sizeof(void *), (void *)(offset)))) { + increment_metric(metricID_UnwindNativeCustomLabelsErrReadData); + DEBUG_PRINT("Failed to read custom labels current set pointer: %d", err); + return false; + } + + if (!p_current_set) { + DEBUG_PRINT("Null labelset"); + record->trace.custom_labels.len = 0; + return true; + } + + NativeCustomLabelsSet current_set; + if ((err = bpf_probe_read_user(¤t_set, sizeof(current_set), p_current_set))) { + increment_metric(metricID_UnwindNativeCustomLabelsErrReadData); + DEBUG_PRINT("cl: failed to read custom labels data: %d", err); + return false; + } + + DEBUG_PRINT("cl: native custom labels count: %lu", current_set.count); + + unsigned ct = 0; + CustomLabelsArray *out = &record->trace.custom_labels; + +#pragma unroll + for (int i = 0; i < MAX_CUSTOM_LABELS; i++) { + if (i >= current_set.count) + break; + NativeCustomLabel *lbl_ptr = current_set.storage + i; + if ((err = bpf_probe_read_user( + &record->nativeCustomLabel, sizeof(NativeCustomLabel), (void *)(lbl_ptr)))) { + increment_metric(metricID_UnwindNativeCustomLabelsErrReadData); + DEBUG_PRINT("cl: failed to read label storage struct: %d", err); + return false; + } + NativeCustomLabel *lbl = &record->nativeCustomLabel; + if (!lbl->key.buf) + continue; + CustomLabel *out_lbl = &out->labels[ct]; + unsigned klen = MIN(lbl->key.len, CUSTOM_LABEL_MAX_KEY_LEN - 1); + if ((err = bpf_probe_read_user(out_lbl->key, klen, (void *)lbl->key.buf))) { + increment_metric(metricID_UnwindNativeCustomLabelsErrReadKey); + DEBUG_PRINT("cl: failed to read label key: %d", err); + goto exit; + } + unsigned vlen = MIN(lbl->value.len, CUSTOM_LABEL_MAX_VAL_LEN - 1); + if ((err = bpf_probe_read_user(out_lbl->val, vlen, (void *)lbl->value.buf))) { + increment_metric(metricID_UnwindNativeCustomLabelsErrReadValue); + DEBUG_PRINT("cl: failed to read label value: %d", err); + goto exit; + } + ++ct; + } +exit: + out->len = ct; + increment_metric(metricID_UnwindNativeCustomLabelsReadSuccesses); + return true; +} + +static inline __attribute__((__always_inline__)) void +maybe_add_native_custom_labels(PerCPURecord *record) +{ + u32 pid = record->trace.pid; + NativeCustomLabelsProcInfo *proc = bpf_map_lookup_elem(&cl_procs, &pid); + if (!proc) { + DEBUG_PRINT("cl: %d does not support native custom labels", pid); + return; + } + DEBUG_PRINT("cl: trace is within a process with native custom labels enabled"); + bool success = get_native_custom_labels(record, proc); + if (success) + increment_metric(metricID_UnwindNativeCustomLabelsAddSuccesses); + else + increment_metric(metricID_UnwindNativeCustomLabelsAddErrors); +} + static inline __attribute__((__always_inline__)) void maybe_add_apm_info(Trace *trace) { u32 pid = trace->pid; // verifier needs this to be on stack on 4.15 kernel @@ -182,6 +351,9 @@ static inline __attribute__((__always_inline__)) int unwind_stop(struct pt_regs Trace *trace = &record->trace; UnwindState *state = &record->state; + // Do Go first since we might tail call out and back again. + maybe_add_go_custom_labels(ctx, record); + maybe_add_native_custom_labels(record); maybe_add_apm_info(trace); // If the stack is otherwise empty, push an error for that: we should diff --git a/support/ebpf/native_stack_trace.ebpf.c b/support/ebpf/native_stack_trace.ebpf.c index 62c6d5c63..7065a43ba 100644 --- a/support/ebpf/native_stack_trace.ebpf.c +++ b/support/ebpf/native_stack_trace.ebpf.c @@ -479,7 +479,9 @@ static ErrorCode unwind_one_frame(u64 pid, u32 frame_idx, struct UnwindState *st state->sp = rt_regs[31]; state->fp = rt_regs[29]; state->lr = normalize_pac_ptr(rt_regs[30]); + state->r7 = rt_regs[7]; state->r22 = rt_regs[22]; + state->r28 = rt_regs[28]; state->return_address = false; state->lr_invalid = false; DEBUG_PRINT("signal frame"); diff --git a/support/ebpf/tracemgmt.h b/support/ebpf/tracemgmt.h index cc2816ac9..468d08761 100644 --- a/support/ebpf/tracemgmt.h +++ b/support/ebpf/tracemgmt.h @@ -214,12 +214,15 @@ static inline PerCPURecord *get_pristine_per_cpu_record() record->state.r13 = 0; #elif defined(__aarch64__) record->state.lr = 0; + record->state.r7 = 0; record->state.r22 = 0; record->state.lr_invalid = false; + record->state.r28 = 0; #endif record->state.return_address = false; record->state.error_metric = -1; record->state.unwind_error = ERR_OK; + record->state.processed_go_labels = false; record->perlUnwindState.stackinfo = 0; record->perlUnwindState.cop = 0; record->pythonUnwindState.py_frame = 0; @@ -229,6 +232,7 @@ static inline PerCPURecord *get_pristine_per_cpu_record() record->unwindersDone = 0; record->tailCalls = 0; record->ratelimitAction = RATELIMIT_ACTION_DEFAULT; + record->customLabelsState.go_m_ptr = NULL; Trace *trace = &record->trace; trace->kernel_stack_id = -1; @@ -240,6 +244,12 @@ static inline PerCPURecord *get_pristine_per_cpu_record() trace->apm_trace_id.as_int.lo = 0; trace->apm_transaction_id.as_int = 0; + u64 *labels_space = (u64 *)&trace->custom_labels; +#pragma unroll + for (int i = 0; i < sizeof(CustomLabelsArray) / 8; i++) { + labels_space[i] = 0; + } + return record; } @@ -549,6 +559,7 @@ copy_state_regs(UnwindState *state, struct pt_regs *regs, bool interrupted_kerne state->r9 = regs->r9; state->r11 = regs->r11; state->r13 = regs->r13; + state->r14 = regs->r14; state->r15 = regs->r15; // Treat syscalls as return addresses, but not IRQ handling, page faults, etc.. @@ -565,7 +576,9 @@ copy_state_regs(UnwindState *state, struct pt_regs *regs, bool interrupted_kerne state->sp = regs->sp; state->fp = regs->regs[29]; state->lr = normalize_pac_ptr(regs->regs[30]); + state->r7 = regs->regs[7]; state->r22 = regs->regs[22]; + state->r28 = regs->regs[28]; // Treat syscalls as return addresses, but not IRQ handling, page faults, etc.. // https://github.com/torvalds/linux/blob/2ef5971ff3/arch/arm64/include/asm/ptrace.h#L118 diff --git a/support/ebpf/tracer.ebpf.release.amd64 b/support/ebpf/tracer.ebpf.release.amd64 index c9fd60c3b..393b2c195 100644 Binary files a/support/ebpf/tracer.ebpf.release.amd64 and b/support/ebpf/tracer.ebpf.release.amd64 differ diff --git a/support/ebpf/tracer.ebpf.release.arm64 b/support/ebpf/tracer.ebpf.release.arm64 index 97ae2093e..0014cbaf6 100644 Binary files a/support/ebpf/tracer.ebpf.release.arm64 and b/support/ebpf/tracer.ebpf.release.arm64 differ diff --git a/support/ebpf/types.h b/support/ebpf/types.h index 9a0cb63ba..8d7a19b4f 100644 --- a/support/ebpf/types.h +++ b/support/ebpf/types.h @@ -274,10 +274,10 @@ enum { // number of failures to get TSD base for APM correlation metricID_UnwindApmIntErrReadTsdBase, - // number of failures read the APM correlation pointer + // number of failures to read the APM correlation pointer metricID_UnwindApmIntErrReadCorrBufPtr, - // number of failures read the APM correlation buffer + // number of failures to read the APM correlation buffer metricID_UnwindApmIntErrReadCorrBuf, // number of successful reads of APM correlation info @@ -301,6 +301,33 @@ enum { // number of failures to unwind code object due to its large size metricID_UnwindDotnetErrCodeTooLarge, + // number of attempts to read Go custom labels + metricID_UnwindGoCustomLabelsAttempts, + + // number of failures to read Go custom labels + metricID_UnwindGoCustomLabelsFailures, + + // number of failures to get TSD base for native custom labels + metricID_UnwindNativeCustomLabelsErrReadTsdBase, + + // number of failures to read native custom labels thread-local object + metricID_UnwindNativeCustomLabelsErrReadData, + + // number of failures to read native custom labels key buffer + metricID_UnwindNativeCustomLabelsErrReadKey, + + // number of failures to read native custom labels value buffer + metricID_UnwindNativeCustomLabelsErrReadValue, + + // number of successful reads of native custom labels + metricID_UnwindNativeCustomLabelsReadSuccesses, + + // total number of failures to add native custom labels + metricID_UnwindNativeCustomLabelsAddErrors, + + // total number of successes adding native custom labels + metricID_UnwindNativeCustomLabelsAddSuccesses, + // // Metric IDs above are for counters (cumulative values) // @@ -328,6 +355,7 @@ typedef enum TracePrograms { PROG_UNWIND_RUBY, PROG_UNWIND_V8, PROG_UNWIND_DOTNET, + PROG_GO_LABELS, NUM_TRACER_PROGS, } TracePrograms; @@ -524,6 +552,38 @@ typedef struct __attribute__((packed)) ApmCorrelationBuf { ApmSpanID transaction_id; } ApmCorrelationBuf; +#define CUSTOM_LABEL_MAX_KEY_LEN COMM_LEN +// Big enough to hold UUIDs, etc. +#define CUSTOM_LABEL_MAX_VAL_LEN 48 + +typedef struct CustomLabel { + char key[CUSTOM_LABEL_MAX_KEY_LEN]; + char val[CUSTOM_LABEL_MAX_VAL_LEN]; +} CustomLabel; + +typedef struct NativeCustomLabelsString { + size_t len; + const unsigned char *buf; +} NativeCustomLabelsString; + +typedef struct NativeCustomLabel { + NativeCustomLabelsString key; + NativeCustomLabelsString value; +} NativeCustomLabel; + +typedef struct NativeCustomLabelsThreadLocalData { + NativeCustomLabel *storage; + size_t count; + size_t capacity; +} NativeCustomLabelsSet; + +#define MAX_CUSTOM_LABELS 10 + +typedef struct CustomLabelsArray { + unsigned len; + CustomLabel labels[MAX_CUSTOM_LABELS]; +} CustomLabelsArray; + // Container for a stack trace typedef struct Trace { // The process ID @@ -540,6 +600,8 @@ typedef struct Trace { ApmSpanID apm_transaction_id; // APM trace ID or all-zero if not present. ApmTraceID apm_trace_id; + // Custom Labels + CustomLabelsArray custom_labels; // The kernel stack ID. s32 kernel_stack_id; // The number of frames in the stack. @@ -570,10 +632,10 @@ typedef struct UnwindState { #if defined(__x86_64__) // Current register values for named registers - u64 rax, r9, r11, r13, r15; + u64 rax, r9, r11, r13, r14, r15; #elif defined(__aarch64__) // Current register values for named registers - u64 lr, r22; + u64 lr, r7, r22, r28; #endif // The executable ID/hash associated with PC @@ -594,6 +656,9 @@ typedef struct UnwindState { // Consider calling unwinder_mark_nonleaf_frame rather than setting this directly. bool return_address; + // Make sure we only do this once. + bool processed_go_labels; + #if defined(__aarch64__) // On aarch64, whether to forbid LR-based unwinding. // LR unwinding is only allowed for leaf user-mode frames. Frames making a syscall @@ -681,6 +746,28 @@ typedef struct PythonUnwindScratchSpace { u8 code[192]; } PythonUnwindScratchSpace; +struct GoString { + char *str; + u64 len; +}; + +struct GoSlice { + void *array; + u64 len; + s64 cap; +}; + +typedef struct GoMapBucket { + char tophash[8]; + struct GoString keys[8]; + struct GoString values[8]; + void *overflow; +} GoMapBucket; + +typedef struct CustomLabelsState { + void *go_m_ptr; +} CustomLabelsState; + // Per-CPU info for the stack being built. This contains the stack as well as // meta-data on the number of eBPF tail-calls used so far to construct it. typedef struct PerCPURecord { @@ -696,6 +783,8 @@ typedef struct PerCPURecord { PHPUnwindState phpUnwindState; // The current Ruby unwinder state. RubyUnwindState rubyUnwindState; + // State for Go and Native custom labels + CustomLabelsState customLabelsState; union { // Scratch space for the Dotnet unwinder. DotnetUnwindScratchSpace dotnetUnwindScratch; @@ -705,6 +794,12 @@ typedef struct PerCPURecord { V8UnwindScratchSpace v8UnwindScratch; // Scratch space for the Python unwinder PythonUnwindScratchSpace pythonUnwindScratch; + // Native labels scratch space + NativeCustomLabel nativeCustomLabel; + // Go labels scratch + GoMapBucket goMapBucket; + // Scratch for Go 1.24 labels + struct GoString labels[2]; }; // Mask to indicate which unwinders are complete u32 unwindersDone; @@ -887,4 +982,17 @@ typedef struct ApmIntProcInfo { u64 tls_offset; } ApmIntProcInfo; -#endif +typedef struct NativeCustomLabelsProcInfo { + u64 tls_offset; +} NativeCustomLabelsProcInfo; + +typedef struct GoCustomLabelsOffsets { + u32 m_offset; + u32 curg; + u32 labels; + u32 hmap_count; + u32 hmap_log2_bucket_count; + u32 hmap_buckets; +} GoCustomLabelsOffsets; + +#endif // OPTI_TYPES_H diff --git a/support/ebpf/util.h b/support/ebpf/util.h new file mode 100644 index 000000000..c260544fd --- /dev/null +++ b/support/ebpf/util.h @@ -0,0 +1,7 @@ +#ifndef OPTI_UTIL_H +#define OPTI_UTIL_H + +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +#endif diff --git a/support/types.go b/support/types.go index f24488009..b84602720 100644 --- a/support/types.go +++ b/support/types.go @@ -32,6 +32,7 @@ const ( ProgUnwindPerl = 0x3 ProgUnwindV8 = 0x7 ProgUnwindDotnet = 0x8 + ProgGoLabels = 0x9 ) const ( @@ -47,7 +48,7 @@ const ( const MaxFrameUnwinds = 0x80 const ( - MetricIDBeginCumulative = 0x60 + MetricIDBeginCumulative = 0x69 ) const ( diff --git a/support/types_def.go b/support/types_def.go index c094e0767..0e067dede 100644 --- a/support/types_def.go +++ b/support/types_def.go @@ -37,6 +37,7 @@ const ( ProgUnwindPerl = C.PROG_UNWIND_PERL ProgUnwindV8 = C.PROG_UNWIND_V8 ProgUnwindDotnet = C.PROG_UNWIND_DOTNET + ProgGoLabels = C.PROG_GO_LABELS ) const ( diff --git a/testutils/helpers.go b/testutils/helpers.go new file mode 100644 index 000000000..075a9154b --- /dev/null +++ b/testutils/helpers.go @@ -0,0 +1,127 @@ +package testutils // import "go.opentelemetry.io/ebpf-profiler/testutils" + +import ( + "bufio" + "context" + "errors" + "io" + "os" + "strings" + "testing" + "time" + + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/ebpf-profiler/host" + "go.opentelemetry.io/ebpf-profiler/libpf" + "go.opentelemetry.io/ebpf-profiler/reporter" + "go.opentelemetry.io/ebpf-profiler/tracer" + tracertypes "go.opentelemetry.io/ebpf-profiler/tracer/types" +) + +type MockIntervals struct{} + +func (f MockIntervals) MonitorInterval() time.Duration { return 1 * time.Second } +func (f MockIntervals) TracePollInterval() time.Duration { return 250 * time.Millisecond } +func (f MockIntervals) PIDCleanupInterval() time.Duration { return 1 * time.Second } + +type MockReporter struct{} + +func (f MockReporter) ExecutableKnown(_ libpf.FileID) bool { + return true +} + +func (f MockReporter) ExecutableMetadata(_ *reporter.ExecutableMetadataArgs) { +} + +func (f MockReporter) FrameKnown(_ libpf.FrameID) bool { + return true +} + +func (f MockReporter) FrameMetadata(_ *reporter.FrameMetadataArgs) {} + +func StartTracer(ctx context.Context, t *testing.T, et tracertypes.IncludedTracers, + r reporter.SymbolReporter) (chan *host.Trace, *tracer.Tracer) { + trc, err := tracer.NewTracer(ctx, &tracer.Config{ + CollectCustomLabels: true, + Reporter: r, + Intervals: &MockIntervals{}, + IncludeTracers: et, + SamplesPerSecond: 20, + ProbabilisticInterval: 100, + ProbabilisticThreshold: 100, + }) + require.NoError(t, err) + + go readTracePipe(ctx) + + trc.StartPIDEventProcessor(ctx) + + err = trc.AttachTracer() + require.NoError(t, err) + + log.Info("Attached tracer program") + + err = trc.EnableProfiling() + require.NoError(t, err) + + err = trc.AttachSchedMonitor() + require.NoError(t, err) + + traceCh := make(chan *host.Trace) + + // Spawn monitors for the various result maps + err = trc.StartMapMonitors(ctx, traceCh) + require.NoError(t, err) + + return traceCh, trc +} + +func getTracePipe() (*os.File, error) { + for _, mnt := range []string{ + "/sys/kernel/debug/tracing", + "/sys/kernel/tracing", + "/tracing", + "/trace"} { + t, err := os.Open(mnt + "/trace_pipe") + if err == nil { + return t, nil + } + log.Errorf("Could not open trace_pipe at %s: %s", mnt, err) + } + return nil, os.ErrNotExist +} + +func readTracePipe(ctx context.Context) { + tp, err := getTracePipe() + if err != nil { + log.Warning("Could not open trace_pipe, check that debugfs is mounted") + return + } + + // When we're done kick ReadString out of blocked I/O. + go func() { + <-ctx.Done() + tp.Close() + }() + + r := bufio.NewReader(tp) + for { + line, err := r.ReadString('\n') + if err != nil { + if errors.Is(err, io.EOF) { + continue + } + log.Error(err) + return + } + line = strings.TrimSpace(line) + if line != "" { + log.Infof("%s", line) + } + } +} + +func IsRoot() bool { + return os.Geteuid() == 0 +} diff --git a/tools/coredump/coredump.go b/tools/coredump/coredump.go index eff7b37c3..8457f7f77 100644 --- a/tools/coredump/coredump.go +++ b/tools/coredump/coredump.go @@ -189,7 +189,8 @@ func ExtractTraces(ctx context.Context, pr process.Process, debug bool, includeTracers, _ := tracertypes.Parse("all") manager, err := pm.New(todo, includeTracers, monitorInterval, &coredumpEbpfMaps, - pm.NewMapFileIDMapper(), symCache, elfunwindinfo.NewStackDeltaProvider(), false) + pm.NewMapFileIDMapper(), symCache, elfunwindinfo.NewStackDeltaProvider(), false, + libpf.Set[string]{}, true) if err != nil { return nil, fmt.Errorf("failed to get Interpreter manager: %v", err) } diff --git a/tools/coredump/ebpfcode.go b/tools/coredump/ebpfcode.go index 854ba2503..9a057d7a8 100644 --- a/tools/coredump/ebpfcode.go +++ b/tools/coredump/ebpfcode.go @@ -54,6 +54,7 @@ int bpf_log(const char *fmt, ...) #include "../../support/ebpf/ruby_tracer.ebpf.c" #include "../../support/ebpf/v8_tracer.ebpf.c" #include "../../support/ebpf/system_config.ebpf.c" +#include "../../support/ebpf/go_labels.ebpf.c" int unwind_traces(u64 id, int debug, u64 tp_base, void *ctx) { @@ -117,6 +118,9 @@ int bpf_tail_call(void *ctx, bpf_map_def *map, int index) case PROG_UNWIND_DOTNET: rc = unwind_dotnet(ctx); break; + case PROG_GO_LABELS: + rc = perf_go_labels(ctx); + break; default: return -1; } diff --git a/tracehandler/tracehandler.go b/tracehandler/tracehandler.go index 474ae9321..ad5883360 100644 --- a/tracehandler/tracehandler.go +++ b/tracehandler/tracehandler.go @@ -21,10 +21,6 @@ import ( "go.opentelemetry.io/ebpf-profiler/reporter" ) -// metadataWarnInhibDuration defines the minimum duration between warnings printed -// about failure to obtain metadata for a single PID. -const metadataWarnInhibDuration = 1 * time.Minute - // Compile time check to make sure config.Times satisfies the interfaces. var _ Times = (*times.Times)(nil) @@ -77,10 +73,6 @@ type traceHandler struct { // reporter instance to use to send out traces. reporter reporter.TraceReporter - // metadataWarnInhib tracks inhibitions for warnings printed about failure to - // update container metadata (rate-limiting). - metadataWarnInhib *lru.LRU[libpf.PID, libpf.Void] - times Times } @@ -99,19 +91,12 @@ func newTraceHandler(rep reporter.TraceReporter, traceProcessor TraceProcessor, return nil, err } - metadataWarnInhib, err := lru.New[libpf.PID, libpf.Void](64, libpf.PID.Hash32) - if err != nil { - return nil, fmt.Errorf("failed to create metadata warning inhibitor LRU: %v", err) - } - metadataWarnInhib.SetLifetime(metadataWarnInhibDuration) - t := &traceHandler{ - traceProcessor: traceProcessor, - bpfTraceCache: bpfTraceCache, - umTraceCache: umTraceCache, - reporter: rep, - times: intervals, - metadataWarnInhib: metadataWarnInhib, + traceProcessor: traceProcessor, + bpfTraceCache: bpfTraceCache, + umTraceCache: umTraceCache, + reporter: rep, + times: intervals, } return t, nil @@ -129,6 +114,7 @@ func (m *traceHandler) HandleTrace(bpfTrace *host.Trace) { ExecutablePath: bpfTrace.ExecutablePath, Origin: bpfTrace.Origin, OffTime: bpfTrace.OffTime, + EnvVars: bpfTrace.EnvVars, } if !m.reporter.SupportsReportTraceEvent() { diff --git a/tracer/ebpf_integration_test.go b/tracer/ebpf_integration_test.go index 124c1679b..809c260f9 100644 --- a/tracer/ebpf_integration_test.go +++ b/tracer/ebpf_integration_test.go @@ -3,7 +3,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package tracer +package tracer_test import ( "context" @@ -20,10 +20,10 @@ import ( "go.opentelemetry.io/ebpf-profiler/host" "go.opentelemetry.io/ebpf-profiler/libpf" - "go.opentelemetry.io/ebpf-profiler/proc" - "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/rlimit" "go.opentelemetry.io/ebpf-profiler/support" + "go.opentelemetry.io/ebpf-profiler/testutils" + "go.opentelemetry.io/ebpf-profiler/tracer" tracertypes "go.opentelemetry.io/ebpf-profiler/tracer/types" ) @@ -45,11 +45,12 @@ func forceContextSwitch() { // runKernelFrameProbe executes a perf event on the sched/sched_switch tracepoint // that sends a selection of hand-crafted, predictable traces. -func runKernelFrameProbe(t *testing.T, tracer *Tracer) { +func runKernelFrameProbe(t *testing.T, tr *tracer.Tracer) { coll, err := support.LoadCollectionSpec(false) require.NoError(t, err) - err = coll.RewriteMaps(tracer.ebpfMaps) //nolint:staticcheck + //nolint:staticcheck + err = coll.RewriteMaps(tr.GetEbpfMaps()) require.NoError(t, err) restoreRlimit, err := rlimit.MaximizeMemlock() @@ -85,29 +86,6 @@ func validateTrace(t *testing.T, numKernelFrames int, expected, returned *host.T } } -type mockIntervals struct{} - -func (f mockIntervals) MonitorInterval() time.Duration { return 1 * time.Second } -func (f mockIntervals) TracePollInterval() time.Duration { return 250 * time.Millisecond } -func (f mockIntervals) PIDCleanupInterval() time.Duration { return 1 * time.Second } - -type mockReporter struct{} - -func (f mockReporter) ExecutableKnown(_ libpf.FileID) bool { - return true -} - -func (f mockReporter) ExecutableMetadata(_ *reporter.ExecutableMetadataArgs) { -} - -func (f mockReporter) ReportFallbackSymbol(_ libpf.FrameID, _ string) {} - -func (f mockReporter) FrameKnown(_ libpf.FrameID) bool { - return true -} - -func (f mockReporter) FrameMetadata(_ *reporter.FrameMetadataArgs) {} - func generateMaxLengthTrace() host.Trace { var trace host.Trace for i := 0; i < support.MaxFrameUnwinds; i++ { @@ -125,9 +103,9 @@ func TestTraceTransmissionAndParsing(t *testing.T) { enabledTracers, _ := tracertypes.Parse("") enabledTracers.Enable(tracertypes.PythonTracer) - tracer, err := NewTracer(ctx, &Config{ - Reporter: &mockReporter{}, - Intervals: &mockIntervals{}, + tr, err := tracer.NewTracer(ctx, &tracer.Config{ + Reporter: &testutils.MockReporter{}, + Intervals: &testutils.MockIntervals{}, IncludeTracers: enabledTracers, FilterErrorFrames: false, SamplesPerSecond: 20, @@ -137,14 +115,15 @@ func TestTraceTransmissionAndParsing(t *testing.T) { ProbabilisticInterval: 100, ProbabilisticThreshold: 100, OffCPUThreshold: support.OffCPUThresholdMax, + DebugTracer: true, }) require.NoError(t, err) traceChan := make(chan *host.Trace, 16) - err = tracer.StartMapMonitors(ctx, traceChan) + err = tr.StartMapMonitors(ctx, traceChan) require.NoError(t, err) - runKernelFrameProbe(t, tracer) + runKernelFrameProbe(t, tr) traces := make(map[uint8]*host.Trace) timeout := time.NewTimer(1 * time.Second) @@ -253,17 +232,6 @@ Loop: } func TestAllTracers(t *testing.T) { - kernelSymbols, err := proc.GetKallsyms("/proc/kallsyms") - require.NoError(t, err) - - _, _, err = initializeMapsAndPrograms(kernelSymbols, &Config{ - IncludeTracers: tracertypes.AllTracers(), - MapScaleFactor: 1, - FilterErrorFrames: false, - KernelVersionCheck: false, - DebugTracer: false, - BPFVerifierLogLevel: 0, - OffCPUThreshold: 10, - }) - require.NoError(t, err) + _, _ = testutils.StartTracer(context.Background(), t, tracertypes.AllTracers(), + &testutils.MockReporter{}) } diff --git a/tracer/systemconfig.go b/tracer/systemconfig.go index b8dc3ad48..75ba5d177 100644 --- a/tracer/systemconfig.go +++ b/tracer/systemconfig.go @@ -247,7 +247,8 @@ func loadSystemConfig(coll *cebpf.CollectionSpec, maps map[string]*cebpf.Map, return err } - if includeTracers.Has(types.PerlTracer) || includeTracers.Has(types.PythonTracer) { + if includeTracers.Has(types.PerlTracer) || includeTracers.Has(types.PythonTracer) || + includeTracers.Has(types.GoLabels) { var tpbaseOffset uint64 tpbaseOffset, err = loadTPBaseOffset(coll, maps, kernelSymbols) if err != nil { diff --git a/tracer/tracer.go b/tracer/tracer.go index b48aa8177..a47ef9aa7 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -148,6 +148,9 @@ type Config struct { KernelVersionCheck bool // DebugTracer indicates whether to load the debug version of eBPF tracers. DebugTracer bool + // CollectCustomLabels determines whether to collect custom labels in + // languages that support them. + CollectCustomLabels bool // BPFVerifierLogLevel is the log level of the eBPF verifier output. BPFVerifierLogLevel uint32 // ProbabilisticInterval is the time interval for which probabilistic profiling will be enabled. @@ -156,6 +159,9 @@ type Config struct { ProbabilisticThreshold uint // OffCPUThreshold is the user defined threshold for off-cpu profiling. OffCPUThreshold uint32 + // IncludeEnvVars holds a list of environment variables that should be captured and reported + // from processes + IncludeEnvVars libpf.Set[string] } // hookPoint specifies the group and name of the hooked point in the kernel. @@ -296,7 +302,7 @@ func NewTracer(ctx context.Context, cfg *Config) (*Tracer, error) { processManager, err := pm.New(ctx, cfg.IncludeTracers, cfg.Intervals.MonitorInterval(), ebpfHandler, nil, cfg.Reporter, elfunwindinfo.NewStackDeltaProvider(), - cfg.FilterErrorFrames) + cfg.FilterErrorFrames, cfg.IncludeEnvVars, cfg.CollectCustomLabels) if err != nil { return nil, fmt.Errorf("failed to create processManager: %v", err) } @@ -483,6 +489,11 @@ func initializeMapsAndPrograms(kernelSymbols *libpf.SymbolMap, cfg *Config) ( name: "unwind_dotnet", enable: cfg.IncludeTracers.Has(types.DotnetTracer), }, + { + progID: uint32(support.ProgGoLabels), + name: "go_labels", + enable: cfg.IncludeTracers.Has(types.GoLabels), + }, } if err = loadPerfUnwinders(coll, ebpfProgs, ebpfMaps["perf_progs"], tailCallProgs, @@ -991,6 +1002,7 @@ func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *host.Trace { OffTime: int64(ptr.offtime), KTime: times.KTime(ptr.ktime), CPU: cpu, + EnvVars: procMeta.EnvVariables, } if trace.Origin != support.TraceOriginSampling && trace.Origin != support.TraceOriginOffCPU { @@ -1022,6 +1034,16 @@ func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *host.Trace { } } + if ptr.custom_labels.len > 0 { + trace.CustomLabels = make(map[string]string, int(ptr.custom_labels.len)) + for i := 0; i < int(ptr.custom_labels.len); i++ { + lbl := ptr.custom_labels.labels[i] + key := C.GoString((*C.char)(unsafe.Pointer(&lbl.key))) + val := C.GoString((*C.char)(unsafe.Pointer(&lbl.val))) + trace.CustomLabels[key] = val + } + } + // If there are no kernel frames, or reading them failed, we are responsible // for allocating the columnar frame array. if len(trace.Frames) == 0 { @@ -1181,6 +1203,11 @@ func (t *Tracer) StartMapMonitors(ctx context.Context, traceOutChan chan<- *host return nil } +// Testing hook +func (t *Tracer) GetEbpfMaps() map[string]*cebpf.Map { + return t.ebpfMaps +} + // AttachTracer attaches the main tracer entry point to the perf interrupt events. The tracer // entry point is always the native tracer. The native tracer will determine when to invoke the // interpreter tracers based on address range information. diff --git a/tracer/types/parse.go b/tracer/types/parse.go index eee3a8dff..ec02cd502 100644 --- a/tracer/types/parse.go +++ b/tracer/types/parse.go @@ -22,6 +22,7 @@ const ( RubyTracer V8Tracer DotnetTracer + GoLabels // maxTracers indicates the max. number of different tracers maxTracers @@ -35,6 +36,7 @@ var tracerTypeToName = map[tracerType]string{ RubyTracer: "ruby", V8Tracer: "v8", DotnetTracer: "dotnet", + GoLabels: "go-labels", } var tracerNameToType = make(map[string]tracerType, maxTracers)