diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index a20c589e0..8cc89cb3d 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -11,7 +11,7 @@ on: jobs: analyze: name: Analyze Go (${{ matrix.target_arch }}) - if: ${{ github.actor != 'dependabot[bot]' }} + if: ${{ github.actor != 'dependabot[bot]' && github.repository == 'open-telemetry/opentelemetry-ebpf-profiler' }} runs-on: ubuntu-24.04 strategy: matrix: diff --git a/.github/workflows/env/action.yml b/.github/workflows/env/action.yml index 9ca510ea2..3b15d1408 100644 --- a/.github/workflows/env/action.yml +++ b/.github/workflows/env/action.yml @@ -30,6 +30,12 @@ runs: sudo apt-get install -y curl unzip gcc-aarch64-linux-gnu \ libc6-arm64-cross qemu-user-binfmt libc6:arm64 \ musl-dev:amd64 musl-dev:arm64 musl-tools binutils-aarch64-linux-gnu + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + cache-dependency-path: go.sum + id: go - name: Install Rust uses: dtolnay/rust-toolchain@stable with: diff --git a/.github/workflows/fossa.yml b/.github/workflows/fossa.yml index 3beaa09b7..00d9703d5 100644 --- a/.github/workflows/fossa.yml +++ b/.github/workflows/fossa.yml @@ -11,6 +11,7 @@ permissions: jobs: fossa: runs-on: ubuntu-latest + if: github.repository == 'open-telemetry/opentelemetry-ebpf-profiler' steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml new file mode 100644 index 000000000..c505340e6 --- /dev/null +++ b/.github/workflows/ossf-scorecard.yml @@ -0,0 +1,47 @@ +name: OSSF Scorecard + +on: + push: + branches: + - main + schedule: + - cron: "31 6 * * 4" # once a week + workflow_dispatch: + +permissions: read-all + +jobs: + analysis: + runs-on: ubuntu-latest + permissions: + # Needed for Code scanning upload + security-events: write + # Needed for GitHub OIDC token if publish_results is true + id-token: write + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false + + - uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75731b0186 # v2.4.1 + with: + results_file: results.sarif + results_format: sarif + publish_results: true + + # Upload the results as artifacts (optional). Commenting out will disable + # uploads of run results in SARIF format to the repository Actions tab. + # https://docs.github.com/en/actions/advanced-guides/storing-workflow-data-as-artifacts + - name: "Upload artifact" + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: SARIF file + path: results.sarif + retention-days: 5 + + # Upload the results to GitHub's code scanning dashboard (optional). + # Commenting out will disable upload of results to your repo's Code Scanning dashboard + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@5f8171a638ada777af81d42b55959a643bb29017 # v3.28.12 + with: + sarif_file: results.sarif diff --git a/.github/workflows/push-docker-image.yml b/.github/workflows/push-docker-image.yml index 2e47aca8d..6a3aa750a 100644 --- a/.github/workflows/push-docker-image.yml +++ b/.github/workflows/push-docker-image.yml @@ -9,6 +9,7 @@ on: jobs: build-and-push: runs-on: ubuntu-latest + if: github.repository == 'open-telemetry/opentelemetry-ebpf-profiler' steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/unit-test-on-pull-request.yml b/.github/workflows/unit-test-on-pull-request.yml index 45aeb5f11..eefe66f76 100644 --- a/.github/workflows/unit-test-on-pull-request.yml +++ b/.github/workflows/unit-test-on-pull-request.yml @@ -15,13 +15,6 @@ jobs: uses: actions/checkout@v4 - name: Set up environment uses: ./.github/workflows/env - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - check-latest: true - cache-dependency-path: go.sum - id: go - name: Check for changes in licenses of dependencies run: | make legal @@ -41,13 +34,6 @@ jobs: uses: actions/checkout@v4 - name: Set up environment uses: ./.github/workflows/env - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - check-latest: true - cache-dependency-path: go.sum - id: go - name: Get linter version id: linter-version run: (echo -n "version="; make linter-version) >> "$GITHUB_OUTPUT" @@ -75,13 +61,6 @@ jobs: uses: actions/checkout@v4 - name: Set up environment uses: ./.github/workflows/env - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version-file: go.mod - check-latest: true - cache-dependency-path: go.sum - id: go - name: Cache coredump modules uses: actions/cache@v4 with: @@ -95,45 +74,46 @@ jobs: - name: Tests run: make test TARGET_ARCH=${{ matrix.target_arch }} - check-ebpf-blobs: - name: Check for differences in the eBPF binary blobs + check-binary-blobs: + name: Check for differences in the eBPF and Rust binary blobs runs-on: ubuntu-24.04 container: otel/opentelemetry-ebpf-profiler-dev:latest + defaults: + run: + shell: bash --login {0} steps: - name: Clone code uses: actions/checkout@v4 - - name: Hash eBPF blobs + - name: Hash binary blobs run: | - sha256sum support/ebpf/tracer.ebpf.release.* > ebpf-blobs.hash + sha256sum support/ebpf/tracer.ebpf.release.* > binary-blobs.hash + sha256sum target/x86_64-unknown-linux-musl/release/libsymblib_capi.a >> binary-blobs.hash + sha256sum target/aarch64-unknown-linux-musl/release/libsymblib_capi.a >> binary-blobs.hash - name: Rebuild eBPF blobs run: | rm support/ebpf/tracer.ebpf.release.* make amd64 -C support/ebpf make arm64 -C support/ebpf + - name: Rebuild Rust blobs + run: | + rm -rf target/ + make rust-components TARGET_ARCH=amd64 + make rust-components TARGET_ARCH=arm64 - name: Check for differences run: | - if ! sha256sum --check ebpf-blobs.hash; then - echo "Please rebuild and commit the updated eBPF binary blobs." + if ! sha256sum --check binary-blobs.hash; then + echo "Please rebuild and commit the updated binary blobs." exit 1 fi - build-integration-test-binaries: - name: Build integration test binaries (${{ matrix.target_arch }}-${{ matrix.go_version}}) + name: Build integration test binaries (${{ matrix.target_arch }}) runs-on: ubuntu-24.04 timeout-minutes: 10 strategy: matrix: target_arch: [amd64, arm64] - go_version: [1.23, 1.24] steps: - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: ${{ matrix.go_version }} - check-latest: true - cache-dependency-path: go.sum - id: go - name: Clone code uses: actions/checkout@v4 - name: Set up environment @@ -143,17 +123,16 @@ jobs: - name: Upload integration test binaries uses: actions/upload-artifact@v4 with: - name: integration-test-binaries-${{ matrix.target_arch }}-${{ matrix.go_version}} + name: integration-test-binaries-${{ matrix.target_arch }} path: support/*.test integration-tests: - name: Integration tests (v${{ matrix.kernel }} ${{ matrix.target_arch }} ${{ matrix.go_version}}) + name: Integration tests (v${{ matrix.kernel }} ${{ matrix.target_arch }}) runs-on: ubuntu-24.04 needs: build-integration-test-binaries timeout-minutes: 10 strategy: matrix: - go_version: [1.23, 1.24] include: # List of available kernels here: # https://github.com/cilium/ci-kernels/pkgs/container/ci-kernels/versions?filters%5Bversion_type%5D=tagged @@ -177,13 +156,6 @@ jobs: steps: - name: Clone code uses: actions/checkout@v4 - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: ${{ matrix.go_version }} - check-latest: true - cache-dependency-path: go.sum - id: go - name: Install dependencies run: | sudo apt-get update -y @@ -196,7 +168,7 @@ jobs: sudo mv ~/go/bin/bluebox /usr/local/bin/. - name: Fetch integration test binaries uses: actions/download-artifact@v4 - with: { name: "integration-test-binaries-${{ matrix.target_arch }}-${{ matrix.go_version}}" } + with: { name: "integration-test-binaries-${{ matrix.target_arch }}" } - name: Fetch precompiled kernel run: | install -d ci-kernels diff --git a/.gitignore b/.gitignore index a67cb4472..f2cfd7c8c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,8 @@ /go ebpf-profiler ci-kernels -/target +# Ignore target directory +target/* +# But not these specific paths +!target/x86_64-unknown-linux-musl/release/libsymblib_capi.a +!target/aarch64-unknown-linux-musl/release/libsymblib_capi.a diff --git a/.golangci.yml b/.golangci.yml index a42466039..ec90a4a9e 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -91,8 +91,6 @@ linters: - wastedassign - wsl - wrapcheck - # the following linters are deprecated - - exportloopref # we don't want to change code to Go 1.22+ yet - intrange - copyloopvar diff --git a/Dockerfile b/Dockerfile index 4a24eb43b..5c2f0562e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,8 @@ FROM debian:testing-20241223-slim WORKDIR /agent +RUN dpkg --add-architecture amd64 && dpkg --add-architecture arm64 + # cross_debian_arch: amd64 or arm64 # cross_pkg_arch: x86-64 or aarch64 RUN cross_debian_arch=$(uname -m | sed -e 's/aarch64/amd64/' -e 's/x86_64/arm64/'); \ @@ -9,7 +11,8 @@ RUN cross_debian_arch=$(uname -m | sed -e 's/aarch64/amd64/' -e 's/x86_64/arm64 apt-get update -y && \ apt-get dist-upgrade -y && \ apt-get install -y curl wget make git cmake clang-17 unzip libc6-dev g++ gcc pkgconf \ - gcc-${cross_pkg_arch}-linux-gnu libc6-${cross_debian_arch}-cross musl-dev && \ + gcc-${cross_pkg_arch}-linux-gnu libc6-${cross_debian_arch}-cross \ + musl-dev:amd64 musl-dev:arm64 && \ apt-get clean autoclean && \ apt-get autoremove --yes diff --git a/LICENSES/go.opentelemetry.io/auto/sdk/LICENSE b/LICENSES/go.opentelemetry.io/auto/sdk/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/LICENSES/go.opentelemetry.io/auto/sdk/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile index c3981968b..0a998b1bb 100644 --- a/Makefile +++ b/Makefile @@ -86,9 +86,9 @@ endif rust-components: rust-targets ifeq ($(TARGET_ARCH),arm64) - cargo build --lib --release --target aarch64-unknown-linux-musl + RUSTFLAGS="--remap-path-prefix $(PWD)=/" cargo build --lib --release --target aarch64-unknown-linux-musl else ifeq ($(TARGET_ARCH),amd64) - cargo build --lib --release --target x86_64-unknown-linux-musl + RUSTFLAGS="--remap-path-prefix $(PWD)=/" cargo build --lib --release --target x86_64-unknown-linux-musl endif rust-tests: rust-targets @@ -117,6 +117,9 @@ vanity-import-fix: $(PORTO) test: generate ebpf test-deps go test $(GO_FLAGS) -tags $(GO_TAGS) ./... +sudo-test: integration-test-binaries + (cd support && sudo ./go_labels.test -test.v) + TESTDATA_DIRS:= \ nativeunwind/elfunwindinfo/testdata \ libpf/pfelf/testdata \ @@ -129,9 +132,14 @@ test-deps: TEST_INTEGRATION_BINARY_DIRS := tracer processmanager/ebpf support go_labels -integration-test-binaries: generate ebpf -# Call it a ".test" even though it isn't to get included into bluebox initramfs - go build -o ./support/go_labels_canary.test ./go_labels +# These binaries are named ".test" to get included into bluebox initramfs +support/go_labels_canary1.23.test: ./go_labels/*.go + GOTOOLCHAIN=go1.23.7 go build -tags $(GO_TAGS) -o $@ ./go_labels + +support/go_labels_canary1.24.test: ./go_labels/*.go + GOTOOLCHAIN=go1.24.1 go build -tags $(GO_TAGS) -o $@ ./go_labels + +integration-test-binaries: generate ebpf support/go_labels_canary1.23.test support/go_labels_canary1.24.test $(foreach test_name, $(TEST_INTEGRATION_BINARY_DIRS), \ (go test -ldflags='-extldflags=-static' -trimpath -c \ -tags $(GO_TAGS),static_build,integration \ diff --git a/README.md b/README.md index faea58ea4..03499ec90 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,6 @@ eBPF. optimizations and offer a higher precision of function call chains. ## Building -## Quick Start -If you'd like to quickly test the agent, you can skip to the ["Visualizing data locally"](https://github.com/open-telemetry/opentelemetry-ebpf-profiler?tab=readme-ov-file#visualizing-data-locally) section and launch devfiler. From there, follow the download links for prebuilt agent binaries. ## Platform Requirements The agent can be built with the provided make targets. Docker is required for containerized builds, and both amd64 and arm64 architectures are supported. @@ -77,61 +75,6 @@ of the recently released OTel profiling [signal](https://github.com/open-telemet The agent loads the eBPF program and its maps, starts unwinding and reports captured traces to the backend. -## Visualizing data locally - -We created a desktop application called "devfiler" that allows visualizing the -profiling agent's output locally, making it very convenient for development use. -devfiler spins up a local server that listens on `0.0.0.0:11000`. - -![Screenshot of devfiler UI](./doc/devfiler.png) - -To run it, simply download and unpack the archive from the following URL: - -https://upload.elastic.co/d/05d06f7a9a1898bc39e90b74647a1fb3ae19db7497d88e22aea27f47b8565dde - -Authentication token: `abe24ec533283049` - - -The archive contains a build for each of the following platforms: - -- macOS (Intel) -- macOS (Apple Silicon) -- Linux AppImage (x86_64) -- Linux AppImage (aarch64) - -> [!IMPORTANT] -> -> The macOS application isn't properly signed with an Apple developer certificate: macOS will -> complain about the application being corrupted on start. To work around that, simply run the following -> command after downloading the archive: -> -> ``` -> xattr -d com.apple.quarantine ~/Downloads/devfiler.app.zip -> ``` -> -> If you did this correctly, the application should run just fine after unpacking the ZIP. - -> [!NOTE] -> devfiler is currently in an experimental preview stage. - -### macOS - -This build of devfiler is currently not signed with a globally trusted Apple -developer ID, but with a developer certificate. If you simply double-click the -application, you'll run into an error. Instead of opening it with a double -click, simply do a **right-click** on `devfiler.app`, then choose "Open". If you -go this route, you'll instead be presented with the option to run it anyway. - -### Linux - -The AppImages in the archive should run on any Linux distribution with a -reasonably modern glibc and libgl installation. To run the application, simply -extract the archive and then do: - -```console -./devfiler-appimage-$(uname -m).AppImage -``` - ## Agent internals The host agent is a Go application that is deployed to all machines customers diff --git a/cli_flags.go b/cli_flags.go index 6ace06145..fbc770127 100644 --- a/cli_flags.go +++ b/cli_flags.go @@ -26,6 +26,7 @@ const ( defaultProbabilisticInterval = 1 * time.Minute defaultArgSendErrorFrames = false defaultOffCPUThreshold = 0 + defaultEnvVarsValue = "" // This is the X in 2^(n + x) where n is the default hardcoded map size value defaultArgMapScaleFactor = 0 @@ -67,6 +68,8 @@ var ( "Valid values are in the range [1..%d], and 0 to disable off-cpu profiling."+ "Default is %d.", support.OffCPUThresholdMax, defaultOffCPUThreshold) + envVarsHelp = "Comma separated list of environment variables that will be reported with the" + + "captured profiling samples." ) // Package-scope variable, so that conditionally compiled other components can refer @@ -123,6 +126,8 @@ func parseArgs() (*controller.Config, error) { fs.UintVar(&args.OffCPUThreshold, "off-cpu-threshold", defaultOffCPUThreshold, offCPUThresholdHelp) + fs.StringVar(&args.IncludeEnvVars, "env-vars", defaultEnvVarsValue, envVarsHelp) + fs.Usage = func() { fs.PrintDefaults() } diff --git a/collector/internal/controller.go b/collector/internal/controller.go index d4e1e1d67..17662a552 100644 --- a/collector/internal/controller.go +++ b/collector/internal/controller.go @@ -22,8 +22,8 @@ type Controller struct { func NewController(cfg *controller.Config, nextConsumer xconsumer.Profiles) (*Controller, error) { - intervals := times.New(cfg.MonitorInterval, - cfg.ReporterInterval, cfg.ProbabilisticInterval) + intervals := times.New(cfg.ReporterInterval, + cfg.MonitorInterval, cfg.ProbabilisticInterval) rep, err := reporter.NewCollector(&reporter.Config{ MaxRPCMsgSize: 32 << 20, // 32 MiB diff --git a/doc/devfiler.png b/doc/devfiler.png deleted file mode 100644 index e91a30e86..000000000 Binary files a/doc/devfiler.png and /dev/null differ diff --git a/go.mod b/go.mod index 5925be2ef..3cc001c79 100644 --- a/go.mod +++ b/go.mod @@ -29,12 +29,12 @@ require ( go.opentelemetry.io/collector/receiver v0.116.0 go.opentelemetry.io/collector/receiver/receivertest v0.116.0 go.opentelemetry.io/collector/receiver/xreceiver v0.116.0 - go.opentelemetry.io/otel v1.32.0 - go.opentelemetry.io/otel/metric v1.32.0 + go.opentelemetry.io/otel v1.35.0 + go.opentelemetry.io/otel/metric v1.35.0 golang.org/x/arch v0.10.0 golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 - golang.org/x/sync v0.10.0 - golang.org/x/sys v0.29.0 + golang.org/x/sync v0.12.0 + golang.org/x/sys v0.31.0 google.golang.org/grpc v1.69.2 ) @@ -71,7 +71,7 @@ require ( github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/google/go-cmp v0.6.0 // indirect + github.com/google/go-cmp v0.7.0 // indirect github.com/josharian/native v1.1.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect @@ -96,6 +96,7 @@ require ( github.com/shoenig/go-m1cpu v0.1.6 // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/yusufpapurcu/wmi v1.2.3 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/collector/component/componenttest v0.116.0 // indirect go.opentelemetry.io/collector/config/configtelemetry v0.116.0 // indirect go.opentelemetry.io/collector/consumer v1.22.0 // indirect @@ -104,12 +105,12 @@ require ( go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect go.opentelemetry.io/otel/sdk v1.32.0 // indirect go.opentelemetry.io/otel/sdk/metric v1.32.0 // indirect - go.opentelemetry.io/otel/trace v1.32.0 // indirect + go.opentelemetry.io/otel/trace v1.35.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/crypto v0.31.0 // indirect - golang.org/x/net v0.33.0 // indirect - golang.org/x/text v0.21.0 // indirect + golang.org/x/crypto v0.36.0 // indirect + golang.org/x/net v0.37.0 // indirect + golang.org/x/text v0.23.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250102185135-69823020774d // indirect google.golang.org/protobuf v1.36.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 9629403f2..c81280dc6 100644 --- a/go.sum +++ b/go.sum @@ -90,8 +90,9 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -154,8 +155,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= -github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= -github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/shirou/gopsutil/v3 v3.23.12 h1:z90NtUkp3bMtmICZKpC4+WaknU1eXtp5vtbQ11DgpE4= github.com/shirou/gopsutil/v3 v3.23.12/go.mod h1:1FrWgea594Jp7qmjHUUPlJDTPgcsb9mGnXDxavtikzM= github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM= @@ -191,6 +192,8 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= go.opentelemetry.io/collector/component v0.116.0 h1:SQE1YeVfYCN7bw1n4hknUwJE5U/1qJL552sDhAdSlaA= go.opentelemetry.io/collector/component v0.116.0/go.mod h1:MYgXFZWDTq0uPgF1mkLSFibtpNqksRVAOrmihckOQEs= go.opentelemetry.io/collector/component/componenttest v0.116.0 h1:UIcnx4Rrs/oDRYSAZNHRMUiYs2FBlwgV5Nc0oMYfR6A= @@ -221,20 +224,20 @@ go.opentelemetry.io/collector/receiver/xreceiver v0.116.0 h1:Kc+ixqgMjU2sHhzNrFn go.opentelemetry.io/collector/receiver/xreceiver v0.116.0/go.mod h1:H2YGSNFoMbWMIDvB8tzkReHSVqvogihjtet+ppHfYv8= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw= -go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U= -go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg= +go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ= +go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 h1:Mne5On7VWdx7omSrSSZvM4Kw7cS7NQkOOmLcgscI51U= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0/go.mod h1:IPtUMKL4O3tH5y+iXVyAXqpAwMuzC1IrxVS81rummfE= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 h1:IeMeyr1aBvBiPVYihXIaeIZba6b8E1bYp7lbdxK8CQg= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0/go.mod h1:oVdCUtjq9MK9BlS7TtucsQwUcXcymNiEDjgDD2jMtZU= -go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M= -go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8= +go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M= +go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE= go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4= go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU= go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU= go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ= -go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM= -go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8= +go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs= +go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc= go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= @@ -248,8 +251,8 @@ golang.org/x/arch v0.10.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= -golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34= +golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc= golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk= golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -258,13 +261,13 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= -golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= +golang.org/x/net v0.37.0 h1:1zLorHbz+LYj7MQlSf1+2tPIIgibq2eL5xkrGk6f+2c= +golang.org/x/net v0.37.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= -golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -276,14 +279,14 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= -golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= -golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= +golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= +golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= +golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= -golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= +golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 h1:vVKdlvoWBphwdxWKrFZEuM0kGgGLxUOYcY4U/2Vjg44= golang.org/x/time v0.0.0-20220210224613-90d013bbcef8/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/go_labels/main.go b/go_labels/main.go index b574268ef..9fbb9392d 100644 --- a/go_labels/main.go +++ b/go_labels/main.go @@ -25,7 +25,10 @@ func randomString(n int) string { // to work with qemu/bluebox testing harness. A statically linked go test built binary doesn't // work with the go labels extractor ebpf program, not sure yet if this is a bug. func main() { - labels := pprof.Labels("l1", randomString(16), "l2", randomString(16), "l3", randomString(16)) + labels := pprof.Labels( + "l1", "label1"+randomString(16), + "l2", "label2"+randomString(24), + "l3", "label3"+randomString(48)) lastUpdate := time.Now() pprof.Do(context.TODO(), labels, func(context.Context) { //nolint:revive diff --git a/go_labels/main_test.go b/go_labels/main_test.go index c6f0313dd..71c62ab00 100644 --- a/go_labels/main_test.go +++ b/go_labels/main_test.go @@ -6,6 +6,7 @@ import ( "context" "os" "os/exec" + "strings" "testing" "github.com/stretchr/testify/require" @@ -18,33 +19,47 @@ func TestGoCustomLabels(t *testing.T) { t.Skip("root privileges required") } - ctx, cancel := context.WithCancel(context.Background()) - r := &testutils.MockReporter{} enabledTracers, _ := tracertypes.Parse("") enabledTracers.Enable(tracertypes.GoLabels) - traceCh, _ := testutils.StartTracer(ctx, t, enabledTracers, r) - - // Use a separate exe for getting labels as the bpf code doesn't seem to work with - // go test static binaries at the moment, not clear if that's a problem with the bpf - // code or a bug/fact of life for static go binaries and getting g from TLS. - cmd := exec.Command("./go_labels_canary.test") - err := cmd.Start() - require.NoError(t, err) + traceCh, _ := testutils.StartTracer(context.Background(), t, enabledTracers, r) + for _, tc := range []string{ + "./go_labels_canary1.23.test", + "./go_labels_canary1.24.test", + } { + t.Run(tc, func(t *testing.T) { + // Use a separate exe for getting labels as the bpf code doesn't seem to work with + // go test static binaries at the moment, not clear if that's a problem with the bpf + // code or a bug/fact of life for static go binaries and getting g from TLS. + cmd := exec.Command(tc) + err := cmd.Start() + require.NoError(t, err) - // Wait 1 second for traces to arrive. - for trace := range traceCh { - if trace == nil { - continue - } - if len(trace.CustomLabels) > 0 { - for k, v := range trace.CustomLabels { - t.Logf("Custom label: %s=%s", k, v) + for trace := range traceCh { + if trace == nil { + continue + } + if len(trace.CustomLabels) > 0 { + for k, v := range trace.CustomLabels { + switch k { + case "l1": + require.Len(t, v, 22) + require.True(t, strings.HasPrefix(v, "label1")) + case "l2": + require.Len(t, v, 30) + require.True(t, strings.HasPrefix(v, "label2")) + case "l3": + require.Len(t, v, 47) + require.True(t, strings.HasPrefix(v, "label3")) + default: + t.Fail() + } + } + break + } } - break - } + _ = cmd.Process.Signal(os.Kill) + _ = cmd.Wait() + }) } - cancel() - _ = cmd.Process.Signal(os.Kill) - _ = cmd.Wait() } diff --git a/host/host.go b/host/host.go index ad3eba64d..6f505058c 100644 --- a/host/host.go +++ b/host/host.go @@ -62,5 +62,6 @@ type Trace struct { APMTraceID libpf.APMTraceID APMTransactionID libpf.APMTransactionID CPU int + EnvVars map[string]string CustomLabels map[string]string } diff --git a/internal/controller/config.go b/internal/controller/config.go index 57f108248..cf457100a 100644 --- a/internal/controller/config.go +++ b/internal/controller/config.go @@ -32,15 +32,13 @@ type Config struct { Tracers string VerboseMode bool Version bool - // HostName is the name of the host. - HostName string - // IPAddress is the IP address of the host that sends data to CollAgentAddr. - IPAddress string - OffCPUThreshold uint + OffCPUThreshold uint Reporter reporter.Reporter Fs *flag.FlagSet + + IncludeEnvVars string } const ( diff --git a/internal/controller/controller.go b/internal/controller/controller.go index c696d1e36..7d823352e 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -3,12 +3,14 @@ package controller // import "go.opentelemetry.io/ebpf-profiler/internal/control import ( "context" "fmt" + "strings" "time" log "github.com/sirupsen/logrus" "github.com/tklauser/numcpus" "go.opentelemetry.io/ebpf-profiler/host" + "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/metrics" "go.opentelemetry.io/ebpf-profiler/reporter" "go.opentelemetry.io/ebpf-profiler/times" @@ -71,6 +73,15 @@ func (c *Controller) Start(ctx context.Context) error { return fmt.Errorf("failed to start reporter: %w", err) } + envVars := libpf.Set[string]{} + splittedEnvVars := strings.Split(c.config.IncludeEnvVars, ",") + for _, envVar := range splittedEnvVars { + envVar = strings.TrimSpace(envVar) + if envVar != "" { + envVars[envVar] = libpf.Void{} + } + } + // Load the eBPF code and map definitions trc, err := tracer.NewTracer(ctx, &tracer.Config{ Reporter: c.reporter, @@ -85,6 +96,7 @@ func (c *Controller) Start(ctx context.Context) error { ProbabilisticInterval: c.config.ProbabilisticInterval, ProbabilisticThreshold: c.config.ProbabilisticThreshold, OffCPUThreshold: uint32(c.config.OffCPUThreshold), + IncludeEnvVars: envVars, }) if err != nil { return fmt.Errorf("failed to load eBPF tracer: %w", err) diff --git a/interpreter/apmint/apmint.go b/interpreter/apmint/apmint.go index 5ac60895a..bb4eee195 100644 --- a/interpreter/apmint/apmint.go +++ b/interpreter/apmint/apmint.go @@ -133,6 +133,9 @@ func (d data) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, }, nil } +func (d data) Unload(_ interpreter.EbpfHandler) { +} + type Instance struct { serviceName string socket *apmAgentSocket diff --git a/interpreter/customlabels/customlabels.go b/interpreter/customlabels/customlabels.go index 7f9404450..632b6e512 100644 --- a/interpreter/customlabels/customlabels.go +++ b/interpreter/customlabels/customlabels.go @@ -151,6 +151,8 @@ func (d data) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, return &instance{}, nil } +func (d data) Unload(_ interpreter.EbpfHandler) {} + func (i *instance) Detach(ebpf interpreter.EbpfHandler, pid libpf.PID) error { return ebpf.DeleteProcData(libpf.CustomLabels, pid) } diff --git a/interpreter/dotnet/data.go b/interpreter/dotnet/data.go index 0abb4b509..407a210c0 100644 --- a/interpreter/dotnet/data.go +++ b/interpreter/dotnet/data.go @@ -159,6 +159,9 @@ func (d *dotnetData) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, bias li }, nil } +func (d *dotnetData) Unload(_ interpreter.EbpfHandler) { +} + func (d *dotnetData) loadIntrospectionData() { vms := &d.vmStructs diff --git a/interpreter/golang/golang.go b/interpreter/golang/golang.go index 5a1bd343f..83db92ed4 100644 --- a/interpreter/golang/golang.go +++ b/interpreter/golang/golang.go @@ -1,7 +1,6 @@ package golang // import "go.opentelemetry.io/ebpf-profiler/interpreter/golang" import ( - "errors" "fmt" "regexp" "unsafe" @@ -50,14 +49,14 @@ func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interprete if err != nil { return nil, err } - goVersion, err := ReadGoVersion(file) - if errors.Is(err, ErrNoGoVersion) { - log.Debugf("file %s is not a Go binary", info.FileName()) - return nil, nil - } + goVersion, err := file.GoVersion() if err != nil { return nil, err } + if goVersion == "" { + log.Debugf("file %s is not a Go binary", info.FileName()) + return nil, nil + } log.Debugf("file %s detected as go version %s", info.FileName(), goVersion) majorMinor := goMajorMinorRegex.FindString(goVersion) if majorMinor == "" { @@ -82,3 +81,5 @@ func Loader(_ interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interprete offsets: offsets, }, nil } + +func (d data) Unload(_ interpreter.EbpfHandler) {} diff --git a/interpreter/golang/readelf.go b/interpreter/golang/readelf.go deleted file mode 100644 index ef4faeccc..000000000 --- a/interpreter/golang/readelf.go +++ /dev/null @@ -1,118 +0,0 @@ -package golang // import "go.opentelemetry.io/ebpf-profiler/interpreter/golang" - -import ( - "bytes" - "encoding/binary" - "errors" - "io" - - "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" -) - -var ErrNoGoVersion = errors.New("go version not found") -var buildInfoMagic = []byte("\xff Go buildinf:") - -// readBuildInfo reads build info, failing if it's not -// in the first 1 MiB of the given stream. -func readBuildInfo(s io.ReaderAt) ([]byte, error) { - const ( - buildInfoAlign = 16 - buildInfoSize = 32 - chunk = 1 << 20 - ) - buf := make([]byte, chunk) - n, err := s.ReadAt(buf, 0) - if err != nil && !errors.Is(err, io.EOF) { - return nil, err - } - data := buf[:n] - for { - i := bytes.Index(data, buildInfoMagic) - if i < 0 || len(data)-i < buildInfoSize { - break - } - if i%buildInfoAlign == 0 && len(data)-i >= buildInfoSize { - data = data[i:] - return data, nil - } - data = data[(i+buildInfoAlign-1)&^(buildInfoAlign-1):] - } - return nil, ErrNoGoVersion -} - -func decodeString(data []byte) string { - u, n := binary.Uvarint(data) - if n <= 0 || u > uint64(len(data)-n) { - return "" - } - return string(data[n : uint64(n)+u]) -} - -// readString returns the string at address addr in the executable x. -func readString(x *pfelf.File, ptrSize int, - readPtr func([]byte) uint64, addr uint64) (string, error) { - buf := make([]byte, 2*ptrSize) - n, err := x.ReadAt(buf, int64(addr)) - if err != nil { - return "", err - } - if n != len(buf) { - return "", io.EOF - } - dataAddr := readPtr(buf) - dataLen := readPtr(buf[ptrSize:]) - const maxSize = 64 // implausible that a Go version string is bigger than this - if dataLen > maxSize { - return "", ErrNoGoVersion - } - buf = make([]byte, dataLen) - n, err = x.ReadAt(buf, int64(dataAddr)) - if err != nil { - return "", err - } - if n != len(buf) { - return "", io.EOF - } - return string(buf), nil -} - -// ReadGoVersion returns the version of the Go toolchain that build the binary -// (for example, "go1.19.2"). -// -// It is guaranteed not to consume more than 1 MiB of memory. -func ReadGoVersion(f *pfelf.File) (string, error) { - vs := f.Section(".go.buildinfo") - if vs == nil { - return "", ErrNoGoVersion - } - data, err := readBuildInfo(vs) - if err != nil { - return "", err - } - ptrSize := int(data[14]) - var vers string - if data[15]&2 != 0 { - vers = decodeString(data[32:]) - } else { - bigEndian := data[15] != 0 - var bo binary.ByteOrder - if bigEndian { - bo = binary.BigEndian - } else { - bo = binary.LittleEndian - } - var readPtr func([]byte) uint64 - if ptrSize == 4 { - readPtr = func(b []byte) uint64 { return uint64(bo.Uint32(b)) } - } else if ptrSize == 8 { - readPtr = bo.Uint64 - } else { - return "", ErrNoGoVersion - } - vers, err = readString(f, ptrSize, readPtr, readPtr(data[16:])) - if err != nil { - return "", err - } - } - return vers, nil -} diff --git a/interpreter/hotspot/data.go b/interpreter/hotspot/data.go index 0084a89cc..c49bcb06c 100644 --- a/interpreter/hotspot/data.go +++ b/interpreter/hotspot/data.go @@ -381,6 +381,9 @@ func (d *hotspotData) Attach(_ interpreter.EbpfHandler, _ libpf.PID, bias libpf. }, nil } +func (d *hotspotData) Unload(_ interpreter.EbpfHandler) { +} + // locateJvmciVMStructs attempts to heuristically locate the JVMCI VM structs by // searching for references to the string `Klass_vtable_start_offset`. In all JVM // versions >= 9.0, this corresponds to the first entry in the VM structs: diff --git a/interpreter/luajit/luajit.go b/interpreter/luajit/luajit.go index 99a6a40fe..cb8fd60f7 100644 --- a/interpreter/luajit/luajit.go +++ b/interpreter/luajit/luajit.go @@ -113,6 +113,8 @@ func (d *luajitData) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, _ libpf }, nil } +func (d *luajitData) Unload(_ interpreter.EbpfHandler) {} + func (l *luajitInstance) Detach(ebpf interpreter.EbpfHandler, pid libpf.PID) error { // Clear memory ranges for _, prefixes := range l.prefixes { diff --git a/interpreter/nodev8/v8.go b/interpreter/nodev8/v8.go index 21ad11896..89554f2c7 100644 --- a/interpreter/nodev8/v8.go +++ b/interpreter/nodev8/v8.go @@ -227,8 +227,6 @@ var ( // regex for the interpreter executable or shared library v8Regex = regexp.MustCompile(`^(?:.*/)?node(\d+)?$|^(?:.*/)libnode\.so(\.\d+)?$`) - v8LibRegex = regexp.MustCompile(`^(?:.*/)libnode\.so(\.\d+)?$`) - // The FileID used for V8 stub frames v8StubsFileID = libpf.NewFileID(0x578b, 0x1d) @@ -1879,6 +1877,9 @@ func (d *v8Data) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, _ libpf.Add }, nil } +func (d *v8Data) Unload(_ interpreter.EbpfHandler) { +} + func (d *v8Data) readIntrospectionData(ef *pfelf.File, syms libpf.SymbolFinder) error { // Read the variables from the pfelf.File so we avoid failures if the process // exists during extraction of the introspection data. @@ -2123,9 +2124,7 @@ func (d *v8Data) readIntrospectionData(ef *pfelf.File, syms libpf.SymbolFinder) func Loader(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo) (interpreter.Data, error) { if !v8Regex.MatchString(info.FileName()) { - if !v8LibRegex.MatchString(info.FileName()) { - return nil, nil - } + return nil, nil } ef, err := info.GetELF() diff --git a/interpreter/perl/data.go b/interpreter/perl/data.go index d2489678b..6a76081a5 100644 --- a/interpreter/perl/data.go +++ b/interpreter/perl/data.go @@ -162,6 +162,9 @@ func (d *perlData) Attach(_ interpreter.EbpfHandler, _ libpf.PID, bias libpf.Add }, nil } +func (d *perlData) Unload(_ interpreter.EbpfHandler) { +} + func newData(ebpf interpreter.EbpfHandler, info *interpreter.LoaderInfo, ef *pfelf.File) (*perlData, error) { // The version is encoded in these globals since Perl 5.15.0. diff --git a/interpreter/php/opcache.go b/interpreter/php/opcache.go index 8bf435e21..5e5107643 100644 --- a/interpreter/php/opcache.go +++ b/interpreter/php/opcache.go @@ -246,6 +246,9 @@ func (d *opcacheData) Attach(_ interpreter.EbpfHandler, _ libpf.PID, bias libpf. }, nil } +func (d *opcacheData) Unload(_ interpreter.EbpfHandler) { +} + func determineOPCacheVersion(ef *pfelf.File) (uint, error) { // In contrast to interpreterphp, the opcache actually contains // a really straightforward way to recover the version. As the opcache diff --git a/interpreter/php/php.go b/interpreter/php/php.go index 95313f863..5b6206adb 100644 --- a/interpreter/php/php.go +++ b/interpreter/php/php.go @@ -146,6 +146,9 @@ func (d *phpData) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, bias libpf return instance, nil } +func (d *phpData) Unload(_ interpreter.EbpfHandler) { +} + func versionExtract(rodata string) (uint, error) { matches := versionMatch.FindStringSubmatch(rodata) if matches == nil { diff --git a/interpreter/python/python.go b/interpreter/python/python.go index d3211c15e..82f4dfebe 100644 --- a/interpreter/python/python.go +++ b/interpreter/python/python.go @@ -148,6 +148,9 @@ func (d *pythonData) Attach(_ interpreter.EbpfHandler, _ libpf.PID, bias libpf.A return i, nil } +func (d *pythonData) Unload(_ interpreter.EbpfHandler) { +} + // pythonCodeObject contains the information we cache for a corresponding // Python interpreter's PyCodeObject structures. type pythonCodeObject struct { diff --git a/interpreter/ruby/ruby.go b/interpreter/ruby/ruby.go index 24fe7fc3e..66f04b201 100644 --- a/interpreter/ruby/ruby.go +++ b/interpreter/ruby/ruby.go @@ -232,6 +232,9 @@ func (r *rubyData) Attach(ebpf interpreter.EbpfHandler, pid libpf.PID, bias libp }, nil } +func (r *rubyData) Unload(_ interpreter.EbpfHandler) { +} + // rubyIseqBodyPC holds a reported address to a iseq_constant_body and Ruby VM program counter // combination and is used as key in the cache. type rubyIseqBodyPC struct { diff --git a/interpreter/types.go b/interpreter/types.go index 325d80c4e..2ee082882 100644 --- a/interpreter/types.go +++ b/interpreter/types.go @@ -125,6 +125,9 @@ type Data interface { // of it to the ebpf maps. Attach(ebpf EbpfHandler, pid libpf.PID, bias libpf.Address, rm remotememory.RemoteMemory) ( Instance, error) + + // Unload can undo any allocations or eBPF entries the Loader function created + Unload(ebpf EbpfHandler) } // Instance is the interface to operate on per-PID data. diff --git a/libpf/convenience.go b/libpf/convenience.go index 1abdb6043..e936f64b2 100644 --- a/libpf/convenience.go +++ b/libpf/convenience.go @@ -4,7 +4,6 @@ package libpf // import "go.opentelemetry.io/ebpf-profiler/libpf" import ( - "context" "math/rand/v2" "reflect" "time" @@ -13,24 +12,6 @@ import ( log "github.com/sirupsen/logrus" ) -// SleepWithJitter sleeps for baseDuration +/- jitter (jitter is [0..1]) -func SleepWithJitter(baseDuration time.Duration, jitter float64) { - time.Sleep(AddJitter(baseDuration, jitter)) -} - -// SleepWithJitterAndContext blocks for duration +/- jitter (jitter is [0..1]) or until ctx -// is canceled. -func SleepWithJitterAndContext(ctx context.Context, duration time.Duration, jitter float64) error { - tick := time.NewTicker(AddJitter(duration, jitter)) - defer tick.Stop() - select { - case <-ctx.Done(): - return ctx.Err() - case <-tick.C: - return nil - } -} - // AddJitter adds +/- jitter (jitter is [0..1]) to baseDuration func AddJitter(baseDuration time.Duration, jitter float64) time.Duration { if jitter < 0.0 || jitter > 1.0 { diff --git a/libpf/convenience_test.go b/libpf/convenience_test.go index 1fd7a74f7..cf187a8e6 100644 --- a/libpf/convenience_test.go +++ b/libpf/convenience_test.go @@ -11,42 +11,6 @@ import ( "github.com/stretchr/testify/assert" ) -func TestHexTo(t *testing.T) { - tests := map[string]struct { - result uint64 - }{ - "0": {result: 0}, - "FFFFFF": {result: 16777215}, - "42": {result: 66}, - } - - for name, testcase := range tests { - name := name - testcase := testcase - t.Run(name, func(t *testing.T) { - assert.Equal(t, testcase.result, util.HexToUint64(name)) - }) - } -} - -func TestDecTo(t *testing.T) { - tests := map[string]struct { - result uint64 - }{ - "0": {result: 0}, - "123": {result: 123}, - "42": {result: 42}, - } - - for name, testcase := range tests { - name := name - testcase := testcase - t.Run(name, func(t *testing.T) { - assert.Equal(t, testcase.result, util.DecToUint64(name)) - }) - } -} - func TestIsValidString(t *testing.T) { tests := map[string]struct { input []byte diff --git a/libpf/generics.go b/libpf/generics.go index 6a57009d4..5ba4b52fe 100644 --- a/libpf/generics.go +++ b/libpf/generics.go @@ -24,24 +24,6 @@ func MapKeysToSlice[K comparable, V any](m map[K]V) []K { return slice } -// MapValuesToSlice creates a slice from a map's values. -func MapValuesToSlice[K comparable, V any](m map[K]V) []V { - slice := make([]V, 0, len(m)) - for _, value := range m { - slice = append(slice, value) - } - return slice -} - -// SliceToSet creates a set from a slice, deduplicating it. -func SliceToSet[T comparable](s []T) Set[T] { - set := make(map[T]Void, len(s)) - for _, item := range s { - set[item] = Void{} - } - return set -} - // SliceAllEqual checks whether all items in a slice have a given value. func SliceAllEqual[T comparable](s []T, value T) bool { for _, item := range s { @@ -52,25 +34,3 @@ func SliceAllEqual[T comparable](s []T, value T) bool { return true } - -// SlicesEqual checks whether two slices are element-wise equal. -func SlicesEqual[T comparable](a, b []T) bool { - if len(a) != len(b) { - return false - } - for i := 0; i < len(a); i++ { - if a[i] != b[i] { - return false - } - } - return true -} - -// MapSlice returns a new slice by mapping given function over the input slice. -func MapSlice[T, V any](in []T, mapf func(T) V) []V { - ret := make([]V, len(in)) - for idx := range in { - ret[idx] = mapf(in[idx]) - } - return ret -} diff --git a/libpf/pfelf/file.go b/libpf/pfelf/file.go index 93010b6a8..6c03703af 100644 --- a/libpf/pfelf/file.go +++ b/libpf/pfelf/file.go @@ -21,6 +21,7 @@ package pfelf // import "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" import ( "bytes" + "debug/buildinfo" "debug/elf" "errors" "fmt" @@ -28,6 +29,7 @@ import ( "io" "os" "path/filepath" + "runtime/debug" "sort" "syscall" "unsafe" @@ -125,6 +127,9 @@ type File struct { debuglinkPath string // Whether we have checked for a debuglink debuglinkChecked bool + + // Contains the Go build information if present + goBuildInfo *debug.BuildInfo } var _ libpf.SymbolFinder = &File{} @@ -521,6 +526,26 @@ func (f *File) GetBuildID() (string, error) { return getBuildIDFromNotes(data) } +// GoVersion returns the Go version if present and empty string otherwise. This will delegate +// to buildinfo.Read for any binaries where IsGolang is true which will scan the binary with +// debug/elf. This will incur additional CPU/IO overhead but the libpf.readbufat buffer and +// OS file buffers should ameliorate most of that. +func (f *File) GoVersion() (string, error) { + if f.goBuildInfo != nil { + return f.goBuildInfo.GoVersion, nil + } + if !f.IsGolang() { + return "", nil + } + bi, err := buildinfo.Read(f.elfReader) + if err != nil { + return "", err + } + f.goBuildInfo = bi + + return bi.GoVersion, nil +} + // DebuglinkFileName returns the debug file linked by .gnu_debuglink if any func (f *File) DebuglinkFileName(elfFilePath string, elfOpener ELFOpener) string { if f.debuglinkChecked { diff --git a/libpf/pfelf/file_test.go b/libpf/pfelf/file_test.go index 5da840197..9e8f4a95f 100644 --- a/libpf/pfelf/file_test.go +++ b/libpf/pfelf/file_test.go @@ -4,7 +4,9 @@ package pfelf import ( + "go/version" "os" + "runtime" "testing" "github.com/stretchr/testify/assert" @@ -78,3 +80,18 @@ func TestPFELFIsGolang(t *testing.T) { testPFELFIsGolang(t, "testdata/go-binary", true) testPFELFIsGolang(t, "testdata/without-debug-syms", false) } + +func TestGoVersion(t *testing.T) { + ef := getPFELF("testdata/go-binary", t) + defer ef.Close() + + vers, err := ef.GoVersion() + require.NoError(t, err) + assert.GreaterOrEqual(t, version.Compare(vers, "go1.23.6"), 0) + + testEF := getPFELF("/proc/self/exe", t) + defer testEF.Close() + testVersion, err := testEF.GoVersion() + require.NoError(t, err) + assert.Equal(t, runtime.Version(), testVersion) +} diff --git a/libpf/pfelf/testdata/Makefile b/libpf/pfelf/testdata/Makefile index 57231c121..757c2801a 100644 --- a/libpf/pfelf/testdata/Makefile +++ b/libpf/pfelf/testdata/Makefile @@ -44,7 +44,6 @@ kernel-image: test.c ubuntu-kernel-image: test.c $(CC) $< -s -o $@ -DLINUX_VERSION="\"Linux version 1.2.3 (Ubuntu 4.5.6)\\n\"" -# A fake go binary (with a .gopclntab section) -go-binary: without-debug-syms - $(OBJCOPY) --add-section .gopclntab=/dev/null $< $@ +go-binary: gotest.go + go build -o go-binary -ldflags "-w -s" gotest.go diff --git a/libpf/pfelf/testdata/gotest.go b/libpf/pfelf/testdata/gotest.go new file mode 100644 index 000000000..da29a2cad --- /dev/null +++ b/libpf/pfelf/testdata/gotest.go @@ -0,0 +1,4 @@ +package main + +func main() { +} diff --git a/main.go b/main.go index 17debe3b8..ddc387858 100644 --- a/main.go +++ b/main.go @@ -99,8 +99,8 @@ func mainWithExitCode() exitCode { }() } - intervals := times.New(cfg.MonitorInterval, - cfg.ReporterInterval, cfg.ProbabilisticInterval) + intervals := times.New(cfg.ReporterInterval, + cfg.MonitorInterval, cfg.ProbabilisticInterval) kernelVersion, err := helpers.GetKernelVersion() if err != nil { @@ -114,7 +114,6 @@ func mainWithExitCode() exitCode { log.Error(err) return exitFailure } - cfg.HostName, cfg.IPAddress = hostname, sourceIP rep, err := reporter.NewOTLP(&reporter.Config{ CollAgentAddr: cfg.CollAgentAddr, diff --git a/metrics/ids.go b/metrics/ids.go index 8fc74b3b6..086a9558b 100644 --- a/metrics/ids.go +++ b/metrics/ids.go @@ -269,12 +269,6 @@ const ( // Number of failures to unwind because return address was not found with heuristic IDUnwindHotspotErrInvalidRA = 130 - // Number of cache hits in tracehandler trace cache by BPF hash - IDKnownTracesHit = 131 - - // Number of cache misses in tracehandler trace cache by BPF hash - IDKnownTracesMiss = 132 - // Current size of the unwind info array IDUnwindInfoArraySize = 133 @@ -638,39 +632,42 @@ const ( // Number of times a trace event read failed (trace_events) IDTraceEventReadError = 274 + // Number of parsing errors seen during processing /proc//maps + IDErrProcParse = 275 + // Number of attempts to read Go custom labels - IDUnwindGoCustomLabelsAttempts = 275 + IDUnwindGoCustomLabelsAttempts = 276 // Number of failures reading Go custom labels - IDUnwindGoCustomLabelsFailures = 276 + IDUnwindGoCustomLabelsFailures = 277 // Number of failures to get TSD base for native custom labels - IDUnwindNativeCustomLabelsErrReadTsdBase = 277 + IDUnwindNativeCustomLabelsErrReadTsdBase = 278 // Number of failures to read native custom labels thread-local object - IDUnwindNativeCustomLabelsErrReadData = 278 + IDUnwindNativeCustomLabelsErrReadData = 279 // Number of failures to read native custom labels key buffer - IDUnwindNativeCustomLabelsErrReadKey = 279 + IDUnwindNativeCustomLabelsErrReadKey = 280 // Number of failures to read native custom labels value buffer - IDUnwindNativeCustomLabelsErrReadValue = 280 + IDUnwindNativeCustomLabelsErrReadValue = 281 // Number of successful reads of native custom labels - IDUnwindNativeCustomLabelsReadSuccesses = 281 + IDUnwindNativeCustomLabelsReadSuccesses = 282 // Total number of failures to add native custom labels - IDUnwindNativeCustomLabelsAddErrors = 282 + IDUnwindNativeCustomLabelsAddErrors = 283 // Number of successes adding native custom labels - IDUnwindNativeCustomLabelsAddSuccesses = 283 + IDUnwindNativeCustomLabelsAddSuccesses = 284 // Number of attempted LuaJIT unwinds - IDUnwindLuaJITAttempts = 284 + IDUnwindLuaJITAttempts = 285 // Number of times we didn't find an entry for this process in the LuaJIT process info array - IDUnwindLuaJITErrNoProcInfo = 285 + IDUnwindLuaJITErrNoProcInfo = 286 // max number of ID values, keep this as *last entry* - IDMax = 286 + IDMax = 287 ) diff --git a/metrics/metrics.json b/metrics/metrics.json index 64508ab21..11a76a554 100644 --- a/metrics/metrics.json +++ b/metrics/metrics.json @@ -952,6 +952,7 @@ "id": 130 }, { + "obsolete": true, "description": "Number of cache hits in tracehandler trace cache by BPF hash", "type": "counter", "name": "KnownTracesHit", @@ -959,6 +960,7 @@ "id": 131 }, { + "obsolete": true, "description": "Number of cache misses in tracehandler trace cache by BPF hash", "type": "counter", "name": "KnownTracesMiss", @@ -1979,81 +1981,88 @@ "field": "agent.errors.trace_event_read_error", "id": 274 }, + { + "description": "Number of parsing errors seen during processing /proc//maps", + "type": "counter", + "name": "ErrProcParse", + "field": "agent.errors.proc_parse", + "id": 275 + }, { "description": "Number of attempts to read Go custom labels", "type": "counter", "name": "UnwindGoCustomLabelsAttempts", "field": "bpf.golabels.attempts", - "id": 275 + "id": 276 }, { "description": "Number of failures reading Go custom labels", "type": "counter", "name": "UnwindGoCustomLabelsFailures", "field": "bpf.golabels.errors.no_proc_info", - "id": 276 + "id": 277 }, { "description": "Number of failures to get TSD base for native custom labels", "type": "counter", "name": "UnwindNativeCustomLabelsErrReadTsdBase", "field": "bpf.nativelabels.errors.read_tsd_base", - "id": 277 + "id": 278 }, { "description": "Number of failures to read native custom labels thread-local object", "type": "counter", "name": "UnwindNativeCustomLabelsErrReadData", "field": "bpf.nativelabels.errors.read_data", - "id": 278 + "id": 279 }, { "description": "Number of failures to read native custom labels key buffer", "type": "counter", "name": "UnwindNativeCustomLabelsErrReadKey", "field": "bpf.nativelabels.errors.read_key", - "id": 279 + "id": 280 }, { "description": "Number of failures to read native custom labels value buffer", "type": "counter", "name": "UnwindNativeCustomLabelsErrReadValue", "field": "bpf.nativelabels.errors.read_value", - "id": 280 + "id": 281 }, { "description": "Number of successful reads of native custom labels", "type": "counter", "name": "UnwindNativeCustomLabelsReadSuccesses", "field": "bpf.nativelabels.read.successes", - "id": 281 + "id": 282 }, { "description": "Total number of failures to add native custom labels", "type": "counter", "name": "UnwindNativeCustomLabelsAddErrors", "field": "bpf.nativelabels.add.errors", - "id": 282 + "id": 283 }, { "description": "Number of successes adding native custom labels", "type": "counter", "name": "UnwindNativeCustomLabelsAddSuccesses", "field": "bpf.nativelabels.add.successes", - "id": 283 + "id": 284 }, { "description": "Number of attempted LuaJIT unwinds", "type": "counter", "name": "UnwindLuaJITAttempts", "field": "bpf.luajit.attempts", - "id": 284 + "id": 285 }, { "description": "Number of times we didn't find an entry for this process in the LuaJIT process info array", "type": "counter", "name": "UnwindLuaJITErrNoProcInfo", "field": "bpf.luajit.errors.no_proc_info", - "id": 285 + "id": 286 } ] diff --git a/process/coredump.go b/process/coredump.go index c20d96af6..d868f5c82 100644 --- a/process/coredump.go +++ b/process/coredump.go @@ -258,8 +258,8 @@ func (cd *CoredumpProcess) GetMachineData() MachineData { } // GetMappings implements the Process interface -func (cd *CoredumpProcess) GetMappings() ([]Mapping, error) { - return cd.mappings, nil +func (cd *CoredumpProcess) GetMappings() ([]Mapping, uint32, error) { + return cd.mappings, 0, nil } // GetThreadInfo implements the Process interface diff --git a/process/process.go b/process/process.go index 760462bbb..04c22ed6a 100644 --- a/process/process.go +++ b/process/process.go @@ -11,16 +11,17 @@ import ( "fmt" "io" "os" + "strconv" "strings" "sync" + log "github.com/sirupsen/logrus" "golang.org/x/sys/unix" "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/libpf/pfelf" "go.opentelemetry.io/ebpf-profiler/remotememory" "go.opentelemetry.io/ebpf-profiler/stringutil" - "go.opentelemetry.io/ebpf-profiler/util" ) // systemProcess provides an implementation of the Process interface for a @@ -79,12 +80,13 @@ func trimMappingPath(path string) string { return path } -func parseMappings(mapsFile io.Reader) ([]Mapping, error) { +func parseMappings(mapsFile io.Reader) ([]Mapping, uint32, error) { + numParseErrors := uint32(0) mappings := make([]Mapping, 0, 32) scanner := bufio.NewScanner(mapsFile) scanBuf := bufPool.Get().(*[]byte) if scanBuf == nil { - return mappings, errors.New("failed to get memory from sync pool") + return mappings, 0, errors.New("failed to get memory from sync pool") } defer func() { // Reset memory and return it for reuse. @@ -93,6 +95,8 @@ func parseMappings(mapsFile io.Reader) ([]Mapping, error) { } bufPool.Put(scanBuf) }() + + lastPath := "" scanner.Buffer(*scanBuf, 8192) for scanner.Scan() { var fields [6]string @@ -101,14 +105,17 @@ func parseMappings(mapsFile io.Reader) ([]Mapping, error) { line := stringutil.ByteSlice2String(scanner.Bytes()) if stringutil.FieldsN(line, fields[:]) < 5 { + numParseErrors++ continue } if stringutil.SplitN(fields[0], "-", addrs[:]) < 2 { + numParseErrors++ continue } mapsFlags := fields[1] if len(mapsFlags) < 3 { + numParseErrors++ continue } flags := elf.ProgFlag(0) @@ -126,12 +133,31 @@ func parseMappings(mapsFile io.Reader) ([]Mapping, error) { if flags&(elf.PF_R|elf.PF_X) == 0 { continue } - inode := util.DecToUint64(fields[4]) + inode, err := strconv.ParseUint(fields[4], 10, 64) + if err != nil { + log.Debugf("inode: failed to convert %s to uint64: %v", fields[4], err) + numParseErrors++ + continue + } + path := fields[5] if stringutil.SplitN(fields[3], ":", devs[:]) < 2 { + numParseErrors++ + continue + } + major, err := strconv.ParseUint(devs[0], 16, 64) + if err != nil { + log.Debugf("major device: failed to convert %s to uint64: %v", devs[0], err) + numParseErrors++ + continue + } + minor, err := strconv.ParseUint(devs[1], 16, 64) + if err != nil { + log.Debugf("minor device: failed to convert %s to uint64: %v", devs[1], err) + numParseErrors++ continue } - device := util.HexToUint64(devs[0])<<8 + util.HexToUint64(devs[1]) + device := major<<8 + minor if inode == 0 { if path == "[vdso]" { @@ -146,21 +172,48 @@ func parseMappings(mapsFile io.Reader) ([]Mapping, error) { } } else { path = trimMappingPath(path) - path = strings.Clone(path) + if path == lastPath { + // Take advantage of the fact that mappings are sorted by path + // and avoid allocating the same string multiple times. + path = lastPath + } else { + path = strings.Clone(path) + lastPath = path + } + } + + vaddr, err := strconv.ParseUint(addrs[0], 16, 64) + if err != nil { + log.Debugf("vaddr: failed to convert %s to uint64: %v", addrs[0], err) + numParseErrors++ + continue + } + vend, err := strconv.ParseUint(addrs[1], 16, 64) + if err != nil { + log.Debugf("vend: failed to convert %s to uint64: %v", addrs[1], err) + numParseErrors++ + continue + } + length := vend - vaddr + + fileOffset, err := strconv.ParseUint(fields[2], 16, 64) + if err != nil { + log.Debugf("fileOffset: failed to convert %s to uint64: %v", fields[2], err) + numParseErrors++ + continue } - vaddr := util.HexToUint64(addrs[0]) mappings = append(mappings, Mapping{ Vaddr: vaddr, - Length: util.HexToUint64(addrs[1]) - vaddr, + Length: length, Flags: flags, - FileOffset: util.HexToUint64(fields[2]), + FileOffset: fileOffset, Device: device, Inode: inode, Path: path, }) } - return mappings, scanner.Err() + return mappings, numParseErrors, scanner.Err() } // GetMappings will process the mappings file from proc. Additionally, @@ -168,14 +221,14 @@ func parseMappings(mapsFile io.Reader) ([]Mapping, error) { // OpenELF opening ELF files using the corresponding proc map_files entry. // WARNING: This implementation does not support calling GetMappings // concurrently with itself, or with OpenELF. -func (sp *systemProcess) GetMappings() ([]Mapping, error) { +func (sp *systemProcess) GetMappings() ([]Mapping, uint32, error) { mapsFile, err := os.Open(fmt.Sprintf("/proc/%d/maps", sp.pid)) if err != nil { - return nil, err + return nil, 0, err } defer mapsFile.Close() - mappings, err := parseMappings(mapsFile) + mappings, numParseErrors, err := parseMappings(mapsFile) if err == nil { fileToMapping := make(map[string]*Mapping, len(mappings)) for idx := range mappings { @@ -189,7 +242,7 @@ func (sp *systemProcess) GetMappings() ([]Mapping, error) { } sp.fileToMapping = fileToMapping } - return mappings, err + return mappings, numParseErrors, err } func (sp *systemProcess) GetThreads() ([]ThreadInfo, error) { diff --git a/process/process_test.go b/process/process_test.go index 3cd2b7d1a..9e5689db7 100644 --- a/process/process_test.go +++ b/process/process_test.go @@ -22,11 +22,16 @@ var testMappings = `55fe82710000-55fe8273c000 r--p 00000000 fd:01 1068432 55fe82836000-55fe8283d000 r--p 00125000 fd:01 1068432 /tmp/usr_bin_seahorse 55fe8283d000-55fe8283e000 rw-p 0012c000 fd:01 1068432 /tmp/usr_bin_seahorse 7f63c8c3e000-7f63c8de0000 r-xp 00085000 08:01 1048922 /tmp/usr_lib_x86_64-linux-gnu_libcrypto.so.1.1 -7f63c8ebf000-7f63c8fef000 r-xp 0001c000 1fd:01 1075944 /tmp/usr_lib_x86_64-linux-gnu_libopensc.so.6.0.0` +7f63c8ebf000-7f63c8fef000 r-xp 0001c000 1fd:01 1075944 /tmp/usr_lib_x86_64-linux-gnu_libopensc.so.6.0.0 +7f63c8eef000-7f63c8fdf000 r-xp 0001c000 1fd:01 +7f63c8eef000-7f63c8fdf000 r-xp 0001c000 1fd.01 1075944 +7f63c8eef000-7f63c8fdf000 r- 0001c000 1fd:01 1075944 +7f63c8eef000 r-xp 0001c000 1fd:01 1075944` func TestParseMappings(t *testing.T) { - mappings, err := parseMappings(strings.NewReader(testMappings)) + mappings, numParseErrors, err := parseMappings(strings.NewReader(testMappings)) require.NoError(t, err) + require.Equal(t, uint32(4), numParseErrors) assert.NotNil(t, mappings) expected := []Mapping{ @@ -101,7 +106,8 @@ func TestNewPIDOfSelf(t *testing.T) { pr := New(libpf.PID(os.Getpid())) assert.NotNil(t, pr) - mappings, err := pr.GetMappings() + mappings, numParseErrors, err := pr.GetMappings() require.NoError(t, err) + require.Equal(t, uint32(0), numParseErrors) assert.NotEmpty(t, mappings) } diff --git a/process/types.go b/process/types.go index 55460b2e3..ffe9e59c3 100644 --- a/process/types.go +++ b/process/types.go @@ -101,7 +101,7 @@ type Process interface { GetMachineData() MachineData // GetMappings reads and parses process memory mappings - GetMappings() ([]Mapping, error) + GetMappings() ([]Mapping, uint32, error) // GetThreads reads the process thread states GetThreads() ([]ThreadInfo, error) diff --git a/processmanager/ebpf/ebpf.go b/processmanager/ebpf/ebpf.go index 1e81aa263..45c824007 100644 --- a/processmanager/ebpf/ebpf.go +++ b/processmanager/ebpf/ebpf.go @@ -780,7 +780,14 @@ func (impl *ebpfMapsImpl) UpdatePidPageMappingInfo(pid libpf.PID, prefix lpm.Pre func (impl *ebpfMapsImpl) DeletePidPageMappingInfo(pid libpf.PID, prefixes []lpm.Prefix) (int, error) { if impl.hasLPMTrieBatchOperations { - return impl.DeletePidPageMappingInfoBatch(pid, prefixes) + deleted, err := impl.DeletePidPageMappingInfoBatch(pid, prefixes) + if err != nil { + // BatchDelete may return early and not run to completion. If that happens, + // fall back to a single Delete pass to avoid leaking map entries. + deleted2, _ := impl.DeletePidPageMappingInfoSingle(pid, prefixes) + return (deleted + deleted2), err + } + return deleted, nil } return impl.DeletePidPageMappingInfoSingle(pid, prefixes) } diff --git a/processmanager/execinfomanager/manager.go b/processmanager/execinfomanager/manager.go index 368a8b345..a012feaf5 100644 --- a/processmanager/execinfomanager/manager.go +++ b/processmanager/execinfomanager/manager.go @@ -279,6 +279,9 @@ func (mgr *ExecutableInfoManager) RemoveOrDecRef(fileID host.FileID) error { if err := state.unloadDeltas(fileID, &info.mapRef); err != nil { return fmt.Errorf("failed remove fileID 0x%x from BPF maps: %w", fileID, err) } + if info.Data != nil { + info.Data.Unload(state.ebpf) + } delete(state.executables, fileID) case 0: // This should be unreachable. diff --git a/processmanager/manager.go b/processmanager/manager.go index 1327feb5b..53511c61a 100644 --- a/processmanager/manager.go +++ b/processmanager/manager.go @@ -67,7 +67,7 @@ var ( func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInterval time.Duration, ebpf pmebpf.EbpfHandler, fileIDMapper FileIDMapper, symbolReporter reporter.SymbolReporter, sdp nativeunwind.StackDeltaProvider, filterErrorFrames bool, - collectCustomLabels bool) (*ProcessManager, error) { + collectCustomLabels bool, includeEnvVars libpf.Set[string]) (*ProcessManager, error) { if fileIDMapper == nil { var err error fileIDMapper, err = newFileIDMapper(lruFileIDCacheSize) @@ -102,6 +102,7 @@ func New(ctx context.Context, includeTracers types.IncludedTracers, monitorInter reporter: symbolReporter, metricsAddSlice: metrics.AddSlice, filterErrorFrames: filterErrorFrames, + includeEnvVars: includeEnvVars, } collectInterpreterMetrics(ctx, pm, monitorInterval) @@ -173,6 +174,8 @@ func collectInterpreterMetrics(ctx context.Context, pm *ProcessManager, metrics.MetricValue(pm.mappingStats.maxProcParseUsec.Swap(0)) summary[metrics.IDTotalProcParseUsec] = metrics.MetricValue(pm.mappingStats.totalProcParseUsec.Swap(0)) + summary[metrics.IDErrProcParse] = + metrics.MetricValue(pm.mappingStats.numProcParseErrors.Swap(0)) mapsMetrics := pm.ebpf.CollectMetrics() for _, metric := range mapsMetrics { diff --git a/processmanager/manager_test.go b/processmanager/manager_test.go index 93d88b106..a5d781136 100644 --- a/processmanager/manager_test.go +++ b/processmanager/manager_test.go @@ -48,8 +48,8 @@ func (d *dummyProcess) GetMachineData() process.MachineData { return process.MachineData{} } -func (d *dummyProcess) GetMappings() ([]process.Mapping, error) { - return nil, errors.New("not implemented") +func (d *dummyProcess) GetMappings() ([]process.Mapping, uint32, error) { + return nil, 0, errors.New("not implemented") } func (d *dummyProcess) GetThreads() ([]process.ThreadInfo, error) { @@ -323,7 +323,8 @@ func TestInterpreterConvertTrace(t *testing.T) { &symbolReporterMockup{}, nil, true, - false) + false, + libpf.Set[string]{}) require.NoError(t, err) newTrace, err := manager.ConvertTrace(testcase.trace) @@ -409,7 +410,9 @@ func TestNewMapping(t *testing.T) { NewMapFileIDMapper(), symRepMockup, &dummyProvider, - true, false) + true, + false, + libpf.Set[string]{}) require.NoError(t, err) // Replace the internal hooks for the tests. These hooks catch the @@ -594,7 +597,9 @@ func TestProcExit(t *testing.T) { NewMapFileIDMapper(), repMockup, &dummyProvider, - true, false) + true, + false, + libpf.Set[string]{}) require.NoError(t, err) defer cancel() diff --git a/processmanager/processinfo.go b/processmanager/processinfo.go index d0e7cf4c6..fa8dcf169 100644 --- a/processmanager/processinfo.go +++ b/processmanager/processinfo.go @@ -16,6 +16,7 @@ import ( "fmt" "os" "path" + "strings" "syscall" "time" @@ -88,8 +89,33 @@ func (pm *ProcessManager) updatePidInformation(pid libpf.PID, m *Mapping) (bool, if name, err := os.ReadFile(fmt.Sprintf("/prod/%d/comm", pid)); err == nil { processName = string(name) } + + envVarMap := make(map[string]string, len(pm.includeEnvVars)) + if len(pm.includeEnvVars) > 0 { + if envVars, err := os.ReadFile(fmt.Sprintf("/proc/%d/environ", pid)); err == nil { + // environ has environment variables separated by a null byte (hex: 00) + splittedVars := strings.Split(string(envVars), "\000") + for _, envVar := range splittedVars { + keyValuePair := strings.SplitN(envVar, "=", 2) + + // If the entry could not be split at a '=', ignore it + // (last entry of environ might be empty) + if len(keyValuePair) != 2 { + continue + } + + if _, ok := pm.includeEnvVars[keyValuePair[0]]; ok { + envVarMap[keyValuePair[0]] = keyValuePair[1] + } + } + } + } + info = &processInfo{ - meta: ProcessMeta{Name: processName, Executable: exePath}, + meta: ProcessMeta{ + Name: processName, + Executable: exePath, + EnvVariables: envVarMap}, mappings: make(map[libpf.Address]*Mapping), mappingsByFileID: make(map[host.FileID]map[libpf.Address]*Mapping), tsdInfo: nil, @@ -561,10 +587,22 @@ func (pm *ProcessManager) SynchronizeProcess(pr process.Process) { pid := pr.PID() log.Debugf("= PID: %v", pid) + // Abort early if process is waiting for cleanup in ProcessedUntil + pm.mu.Lock() + _, ok := pm.exitEvents[pid] + pm.mu.Unlock() + + if ok { + log.Debugf("PID %v waiting for cleanup, aborting SynchronizeProcess", pid) + pm.ebpf.RemoveReportedPID(pid) + return + } + pm.mappingStats.numProcAttempts.Add(1) start := time.Now() - mappings, err := pr.GetMappings() + mappings, numParseErrors, err := pr.GetMappings() elapsed := time.Since(start) + pm.mappingStats.numProcParseErrors.Add(numParseErrors) if err != nil { if os.IsPermission(err) { @@ -585,7 +623,7 @@ func (pm *ProcessManager) SynchronizeProcess(pr process.Process) { pm.mappingStats.errProcNotExist.Add(1) } else if e, ok := err.(*os.PathError); ok && e.Err == syscall.ESRCH { // If the process exits while reading its /proc/$PID/maps, the kernel will - // return ESRCH. Handle it as if the process did not exists. + // return ESRCH. Handle it as if the process did not exist. pm.mappingStats.errProcESRCH.Add(1) } return diff --git a/processmanager/types.go b/processmanager/types.go index c70ec5729..b088752bf 100644 --- a/processmanager/types.go +++ b/processmanager/types.go @@ -78,6 +78,7 @@ type ProcessManager struct { numProcAttempts atomic.Uint32 maxProcParseUsec atomic.Uint32 totalProcParseUsec atomic.Uint32 + numProcParseErrors atomic.Uint32 } // elfInfoCache provides a cache to quickly retrieve the ELF info and fileID for a particular @@ -96,6 +97,9 @@ type ProcessManager struct { // filterErrorFrames determines whether error frames are dropped by `ConvertTrace`. filterErrorFrames bool + + // includeEnvVars holds a list of env vars that should be captured from processes + includeEnvVars libpf.Set[string] } // Mapping represents an executable memory mapping of a process. @@ -141,6 +145,8 @@ type ProcessMeta struct { Name string // executable path retrieved from /proc/PID/exe Executable string + // process env vars from /proc/PID/environ + EnvVariables map[string]string } // processInfo contains information about the executable mappings diff --git a/reporter/base_reporter.go b/reporter/base_reporter.go index 66f5db90d..f618e0023 100644 --- a/reporter/base_reporter.go +++ b/reporter/base_reporter.go @@ -5,6 +5,8 @@ package reporter // import "go.opentelemetry.io/ebpf-profiler/reporter" import ( "context" + "errors" + "fmt" "time" lru "github.com/elastic/go-freelru" @@ -42,6 +44,8 @@ type baseReporter struct { hostmetadata *lru.SyncedLRU[string, string] } +var errUnknownOrigin = errors.New("unknown trace origin") + func (b *baseReporter) Stop() { b.runLoop.Stop() } @@ -63,13 +67,6 @@ func (b *baseReporter) addHostmetadata(metadataMap map[string]string) { } } -// ReportFramesForTrace is a NOP -func (*baseReporter) ReportFramesForTrace(_ *libpf.Trace) {} - -// ReportCountForTrace is a NOP -func (b *baseReporter) ReportCountForTrace(_ libpf.TraceHash, _ uint16, _ *samples.TraceEventMeta) { -} - func (b *baseReporter) ExecutableKnown(fileID libpf.FileID) bool { _, known := b.pdata.Executables.GetAndRefresh(fileID, pdata.ExecutableCacheLifetime) return known @@ -93,13 +90,11 @@ func (b *baseReporter) ExecutableMetadata(args *ExecutableMetadataArgs) { }) } -func (*baseReporter) SupportsReportTraceEvent() bool { return true } - -func (b *baseReporter) ReportTraceEvent(trace *libpf.Trace, meta *samples.TraceEventMeta) { +func (b *baseReporter) ReportTraceEvent(trace *libpf.Trace, meta *samples.TraceEventMeta) error { if meta.Origin != support.TraceOriginSampling && meta.Origin != support.TraceOriginOffCPU { // At the moment only on-CPU and off-CPU traces are reported. - log.Errorf("Skip reporting trace for unexpected %d origin", meta.Origin) - return + return fmt.Errorf("skip reporting trace for %d origin: %w", meta.Origin, + errUnknownOrigin) } var extraMeta any @@ -131,7 +126,7 @@ func (b *baseReporter) ReportTraceEvent(trace *libpf.Trace, meta *samples.TraceE events.Timestamps = append(events.Timestamps, uint64(meta.Timestamp)) events.OffTimes = append(events.OffTimes, meta.OffTime) (*traceEventsMap)[meta.Origin][key] = events - return + return nil } (*traceEventsMap)[meta.Origin][key] = &samples.TraceEvents{ @@ -143,7 +138,9 @@ func (b *baseReporter) ReportTraceEvent(trace *libpf.Trace, meta *samples.TraceE MappingFileOffsets: trace.MappingFileOffsets, Timestamps: []uint64{uint64(meta.Timestamp)}, OffTimes: []int64{meta.OffTime}, + EnvVars: meta.EnvVars, } + return nil } func (b *baseReporter) FrameMetadata(args *FrameMetadataArgs) { diff --git a/reporter/collector_reporter_test.go b/reporter/collector_reporter_test.go index 1701faad0..d5807d465 100644 --- a/reporter/collector_reporter_test.go +++ b/reporter/collector_reporter_test.go @@ -57,7 +57,10 @@ func TestCollectorReporterReportTraceEvent(t *testing.T) { CGroupCacheElements: 1, }, next) require.NoError(t, err) - r.ReportTraceEvent(tt.trace, tt.meta) + if err := r.ReportTraceEvent(tt.trace, tt.meta); err != nil && + !errors.Is(err, errUnknownOrigin) { + t.Fatal(err) + } }) } } diff --git a/reporter/iface.go b/reporter/iface.go index 679a61e6d..695b283ec 100644 --- a/reporter/iface.go +++ b/reporter/iface.go @@ -30,22 +30,10 @@ type Reporter interface { } type TraceReporter interface { - // ReportFramesForTrace accepts a trace with the corresponding frames - // and caches this information before a periodic reporting to the backend. - ReportFramesForTrace(trace *libpf.Trace) - - // ReportCountForTrace accepts a hash of a trace with a corresponding count and - // caches this information before a periodic reporting to the backend. - ReportCountForTrace(traceHash libpf.TraceHash, count uint16, meta *samples.TraceEventMeta) - - // ReportTraceEvent accepts a trace event (trace metadata with frames and counts) - // and caches it for reporting to the backend. It returns true if the event was - // enqueued for reporting, and false if the event was ignored. - ReportTraceEvent(trace *libpf.Trace, meta *samples.TraceEventMeta) - - // SupportsReportTraceEvent returns true if the reporter supports reporting trace events - // via ReportTraceEvent(). - SupportsReportTraceEvent() bool + // ReportTraceEvent accepts a trace event (trace metadata with frames) + // and enqueues it for reporting to the backend. + // If handling the trace event fails it returns an error. + ReportTraceEvent(trace *libpf.Trace, meta *samples.TraceEventMeta) error } // ExecutableOpener is a function that attempts to open an executable. diff --git a/reporter/internal/pdata/generate.go b/reporter/internal/pdata/generate.go index 6570abc30..0e55366bc 100644 --- a/reporter/internal/pdata/generate.go +++ b/reporter/internal/pdata/generate.go @@ -12,7 +12,9 @@ import ( log "github.com/sirupsen/logrus" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pprofile" - semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + "go.opentelemetry.io/otel/attribute" + + semconv "go.opentelemetry.io/otel/semconv/v1.30.0" "go.opentelemetry.io/ebpf-profiler/libpf" "go.opentelemetry.io/ebpf-profiler/reporter/samples" @@ -118,7 +120,7 @@ func (p *Pdata) setProfile( loc := profile.LocationTable().AppendEmpty() loc.SetAddress(uint64(traceInfo.Linenos[i])) attrMgr.AppendOptionalString(loc.AttributeIndices(), - "profile.frame.type", traceInfo.FrameTypes[i].String()) + semconv.ProfileFrameTypeKey, traceInfo.FrameTypes[i].String()) switch frameKind := traceInfo.FrameTypes[i]; frameKind { case libpf.NativeFrame: @@ -153,9 +155,11 @@ func (p *Pdata) setProfile( // semantic convention for build_id, replace these hard coded // strings. attrMgr.AppendOptionalString(mapping.AttributeIndices(), - "process.executable.build_id.gnu", ei.GnuBuildID) + semconv.ProcessExecutableBuildIDGnuKey, + ei.GnuBuildID) attrMgr.AppendOptionalString(mapping.AttributeIndices(), - "process.executable.build_id.htlhash", traceInfo.Files[i].StringNoQuotes()) + semconv.ProcessExecutableBuildIDHtlhashKey, + traceInfo.Files[i].StringNoQuotes()) } loc.SetMappingIndex(locationMappingIndex) case libpf.AbortFrame: @@ -219,6 +223,13 @@ func (p *Pdata) setProfile( attrMgr.AppendInt(sample.AttributeIndices(), semconv.ProcessPIDKey, traceKey.Pid) + for key, value := range traceInfo.EnvVars { + attrMgr.AppendOptionalString( + sample.AttributeIndices(), + attribute.Key("env."+key), + value) + } + if p.ExtraSampleAttrProd != nil { extra := p.ExtraSampleAttrProd.ExtraSampleAttrs(attrMgr, traceKey.ExtraMeta) sample.AttributeIndices().Append(extra...) @@ -306,6 +317,7 @@ func getDummyMappingIndex(fileIDtoMapping map[libpf.FileID]int32, mapping := profile.MappingTable().AppendEmpty() mapping.SetFilenameStrindex(getStringMapIndex(stringMap, "")) attrMgr.AppendOptionalString(mapping.AttributeIndices(), - "process.executable.build_id.htlhash", fileID.StringNoQuotes()) + semconv.ProcessExecutableBuildIDHtlhashKey, + fileID.StringNoQuotes()) return locationMappingIndex } diff --git a/reporter/otlp_reporter.go b/reporter/otlp_reporter.go index 6c2e3fccb..a0d005573 100644 --- a/reporter/otlp_reporter.go +++ b/reporter/otlp_reporter.go @@ -14,7 +14,7 @@ import ( log "github.com/sirupsen/logrus" "go.opentelemetry.io/collector/pdata/pprofile" "go.opentelemetry.io/collector/pdata/pprofile/pprofileotlp" - semconv "go.opentelemetry.io/otel/semconv/v1.22.0" + semconv "go.opentelemetry.io/otel/semconv/v1.30.0" "google.golang.org/grpc" "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" diff --git a/reporter/samples/attrmgr_test.go b/reporter/samples/attrmgr_test.go index 34e82ddca..bd03376b4 100644 --- a/reporter/samples/attrmgr_test.go +++ b/reporter/samples/attrmgr_test.go @@ -12,7 +12,7 @@ import ( "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pprofile" "go.opentelemetry.io/ebpf-profiler/libpf" - semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + semconv "go.opentelemetry.io/otel/semconv/v1.30.0" ) type attributeStruct struct { diff --git a/reporter/samples/samples.go b/reporter/samples/samples.go index 54b92d3a3..466c5ff67 100644 --- a/reporter/samples/samples.go +++ b/reporter/samples/samples.go @@ -15,6 +15,7 @@ type TraceEventMeta struct { CPU int Origin libpf.Origin OffTime int64 + EnvVars map[string]string } // TraceEvents holds known information about a trace. @@ -27,6 +28,7 @@ type TraceEvents struct { MappingFileOffsets []uint64 Timestamps []uint64 // in nanoseconds OffTimes []int64 // in nanoseconds + EnvVars map[string]string } // TraceAndMetaKey is the deduplication key for samples. This **must always** @@ -44,6 +46,7 @@ type TraceAndMetaKey struct { ProcessName string // Executable path is retrieved from /proc/PID/exe ExecutablePath string + // ExtraMeta stores extra meta info that may have been produced by a // `SampleAttrProducer` instance. May be nil. ExtraMeta any diff --git a/rust-crates/symblib-capi/c/symblib.h b/rust-crates/symblib-capi/c/symblib.h index df3c2be0a..24141aa11 100644 --- a/rust-crates/symblib-capi/c/symblib.h +++ b/rust-crates/symblib-capi/c/symblib.h @@ -138,6 +138,44 @@ extern SymblibStatus symblib_retpadextr_submit( // Frees a return pad extractor. extern void symblib_retpadextr_free(SymblibRetPadExtractor* extr); +// Opaque handle to SymblibPointResolver. +typedef struct SymblibPointResolver SymblibPointResolver; + +// Creates a new SymblibPointResolver. +extern SymblibStatus symblib_goruntime_new( + const char* executable, + SymblibPointResolver** runtime // out arg +); + +// Frees a SymblibPointResolver. +extern void symblib_goruntime_free(SymblibPointResolver* runtime); + +// Contains information about a symbol and its origin. +typedef struct SymblibResolvedSymbol { + uint64_t start_addr; + SymblibString function_name; + SymblibString file_name; + uint32_t line_number; +} SymblibResolvedSymbol; + +// Enveloping struct that contains len number of symbols in data. +typedef struct SymblibSlice_SymblibResolvedSymbol { + const SymblibResolvedSymbol* data; + size_t len; +} SymblibSlice_SymblibResolvedSymbol; + +// Single point lookup for pc using SymblibPointResolver. +SymblibStatus symblib_point_resolver_symbols_for_pc( + const SymblibPointResolver* resolver, + uint64_t pc, + SymblibSlice_SymblibResolvedSymbol** symbols // out arg +); + +// Frees a SymblibSlice_SymblibResolvedSymbol. +void symblib_slice_symblibresolved_symbol_free( + SymblibSlice_SymblibResolvedSymbol* slice +); + #ifdef __cplusplus } #endif diff --git a/rust-crates/symblib-capi/src/gosym.rs b/rust-crates/symblib-capi/src/gosym.rs new file mode 100644 index 000000000..186388faf --- /dev/null +++ b/rust-crates/symblib-capi/src/gosym.rs @@ -0,0 +1,67 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +use crate::{FfiResult, StatusCode, SymblibPointResolver}; +use std::ffi::{c_char, CStr}; +use std::mem; +use std::path::Path; +use symblib::symbconv::{PointResolver, ResolvedSymbol, Result as SymconvResult}; +use symblib::{gosym::GoRuntimeInfo, objfile}; + +pub struct SymblibGoRuntime { + #[allow(unused)] + obj: Box, + runtime: GoRuntimeInfo<'static>, +} + +impl PointResolver for SymblibGoRuntime { + fn symbols_for_pc(&self, pc: symblib::VirtAddr) -> SymconvResult> { + self.runtime.symbols_for_pc(pc) + } +} + +#[no_mangle] +pub unsafe extern "C" fn symblib_goruntime_new( + executable: *const c_char, + runtime: *mut *mut SymblibPointResolver, +) -> StatusCode { + match goruntime_new_impl(executable, runtime) { + Ok(()) => StatusCode::Ok, + Err(e) => e, + } +} + +unsafe fn goruntime_new_impl( + executable: *const c_char, + runtime: *mut *mut SymblibPointResolver, +) -> FfiResult { + let executable = CStr::from_ptr(executable) + .to_str() + .map(Path::new) + .map_err(|_| StatusCode::BadUtf8)?; + + let obj = Box::new(objfile::File::load(executable)?); + let obj_reader = obj.parse()?; + let go_runtime = GoRuntimeInfo::open(&obj_reader)?; + + // Transmute away lifetime to allow for self-referential struct. + let go_runtime: GoRuntimeInfo<'static> = mem::transmute(go_runtime); + + let resolver = SymblibGoRuntime { + obj, + runtime: go_runtime, + }; + + let point_resolver = Box::new(SymblibPointResolver::new( + Box::new(resolver) as Box + )); + *runtime = Box::into_raw(point_resolver); + Ok(()) +} + +#[no_mangle] +pub unsafe extern "C" fn symblib_goruntime_free(runtime: *mut SymblibPointResolver) { + if !runtime.is_null() { + drop(Box::from_raw(runtime)); + } +} diff --git a/rust-crates/symblib-capi/src/lib.rs b/rust-crates/symblib-capi/src/lib.rs index b6664ad70..fda7b8716 100644 --- a/rust-crates/symblib-capi/src/lib.rs +++ b/rust-crates/symblib-capi/src/lib.rs @@ -5,12 +5,16 @@ mod ffislice; mod ffistr; +mod gosym; +mod pointresolver; mod rangeextr; mod retpadextr; mod status; pub use ffislice::*; pub use ffistr::*; +pub use gosym::*; +pub use pointresolver::*; pub use rangeextr::*; pub use retpadextr::*; pub use status::*; diff --git a/rust-crates/symblib-capi/src/pointresolver.rs b/rust-crates/symblib-capi/src/pointresolver.rs new file mode 100644 index 000000000..9d4db88ec --- /dev/null +++ b/rust-crates/symblib-capi/src/pointresolver.rs @@ -0,0 +1,66 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +use crate::{StatusCode, SymblibSlice, SymblibString}; +use symblib::symbconv; +use symblib::symbconv::PointResolver; +use symblib::VirtAddr; + +#[repr(C)] +#[derive(Debug)] +pub struct SymblibResolvedSymbol { + pub start_addr: VirtAddr, + pub function_name: SymblibString, + pub file_name: SymblibString, // may be empty + pub line_number: u32, // 0 = unknown +} + +impl From for SymblibResolvedSymbol { + fn from(sym: symbconv::ResolvedSymbol) -> Self { + Self { + start_addr: sym.start_addr, + function_name: sym.function_name.into(), + file_name: sym.file_name.unwrap_or("".to_string()).into(), + line_number: sym.line_number.unwrap_or(0), + } + } +} + +#[repr(C)] +pub struct SymblibPointResolver { + inner: Box, +} + +impl SymblibPointResolver { + pub fn new(resolver: Box) -> Self { + Self { inner: resolver } + } +} + +#[no_mangle] +pub extern "C" fn symblib_point_resolver_symbols_for_pc( + resolver: &SymblibPointResolver, + pc: VirtAddr, + out_symbols: *mut *mut SymblibSlice, +) -> StatusCode { + let symbols: Vec<_> = match resolver.inner.symbols_for_pc(pc) { + Ok(syms) => syms.into_iter().map(Into::into).collect(), + Err(e) => return StatusCode::from(e), + }; + + unsafe { + *out_symbols = Box::into_raw(Box::new(symbols.into())); + } + StatusCode::Ok +} + +#[no_mangle] +pub extern "C" fn symblib_slice_symblibresolved_symbol_free( + slice: *mut SymblibSlice, +) { + if !slice.is_null() { + unsafe { + drop(Box::from_raw(slice)); + } + } +} diff --git a/rust-crates/symblib-capi/src/status.rs b/rust-crates/symblib-capi/src/status.rs index 54686aa2f..8ecfb69f2 100644 --- a/rust-crates/symblib-capi/src/status.rs +++ b/rust-crates/symblib-capi/src/status.rs @@ -4,7 +4,7 @@ //! Defines FFI error codes and their conversion from Rust error types. use std::io; -use symblib::{dwarf, objfile, retpads, symbconv}; +use symblib::{dwarf, gosym, objfile, retpads, symbconv}; pub type FfiResult = Result; @@ -42,6 +42,9 @@ pub enum StatusCode { #[error("The channel was already closed in a previous call")] AlreadyClosed = 8, + + #[error("Point resolver error")] + PointResolver = 9, } impl From for FfiResult { @@ -117,3 +120,9 @@ impl From for StatusCode { Self::Retpad } } + +impl From for StatusCode { + fn from(_: gosym::Error) -> Self { + StatusCode::Symbconv + } +} diff --git a/rust-crates/symblib/src/gosym/mod.rs b/rust-crates/symblib/src/gosym/mod.rs index 6f8394a4d..3fe129823 100644 --- a/rust-crates/symblib/src/gosym/mod.rs +++ b/rust-crates/symblib/src/gosym/mod.rs @@ -17,7 +17,7 @@ mod errors; pub use errors::*; mod raw; -use crate::{objfile, VirtAddr}; +use crate::{objfile, symbconv, VirtAddr}; use fallible_iterator::FallibleIterator; use std::ops::Range; @@ -540,3 +540,49 @@ fn range_rel2abs(base: VirtAddr, rng: Range) -> Range { + /// NOTE: this is currently doesn't support inline functions + fn symbols_for_pc(&self, pc: VirtAddr) -> symbconv::Result> { + let func = match self.find_func(pc) { + Ok(Some(func)) => func, + Ok(None) => return Ok(Vec::new()), + Err(e) => return Err(symbconv::Error::Go(symbconv::go::Error::Gosym(e))), + }; + + let mut symbols = Vec::new(); + let mut source_file = None; + let mut line_number = None; + + // For file mappings + let mut file_iter = func + .file_mapping() + .map_err(|e| symbconv::Error::Go(symbconv::go::Error::Gosym(e)))?; + while let Ok(Some((range, file))) = file_iter.next() { + if range.contains(&VirtAddr::from(pc)) { + source_file = Some(file.unwrap_or("").into()); + break; + } + } + + // For line mappings + let mut line_iter = func + .line_mapping() + .map_err(|e| symbconv::Error::Go(symbconv::go::Error::Gosym(e)))?; + while let Ok(Some((range, line))) = line_iter.next() { + if range.contains(&VirtAddr::from(pc)) { + line_number = Some(line.unwrap_or(0)); + break; + } + } + + symbols.push(symbconv::ResolvedSymbol { + start_addr: func.start_addr(), + function_name: func.name().ok().map(|s| s.to_string()), + file_name: source_file, + line_number: line_number, + }); + + Ok(symbols) + } +} diff --git a/rust-crates/symblib/src/symbconv/mod.rs b/rust-crates/symblib/src/symbconv/mod.rs index 4708a8426..dcb68a136 100644 --- a/rust-crates/symblib/src/symbconv/mod.rs +++ b/rust-crates/symblib/src/symbconv/mod.rs @@ -3,7 +3,7 @@ //! Extract symbol info and convert it to [`symbfile`] format. -use crate::{objfile, symbfile, AnyError}; +use crate::{objfile, symbfile, AnyError, VirtAddr}; use std::io; /// Result type shorthand. @@ -79,6 +79,27 @@ pub trait RangeExtractor { } } +/// Hold information about a symbol and its origin. +pub struct ResolvedSymbol { + /// Start address of a symbol + pub start_addr: VirtAddr, + /// Function name associated with an address. + pub function_name: Option, + /// File name that hold this function. + pub file_name: Option, + /// Line number associcated with this virtual address. + pub line_number: Option, +} + +/// Common interface to tesolve symbols for a specific program counter address. +pub trait PointResolver { + /// Returns all symbols that match the given program counter address. + /// + /// The returned vector contains all resolved symbols at the given address, + /// which can include both the direct function and any inline frames + fn symbols_for_pc(&self, pc: VirtAddr) -> Result>; +} + fn _assert_obj_safe(_: &dyn RangeExtractor) {} pub mod dwarf; diff --git a/target/aarch64-unknown-linux-musl/release/libsymblib_capi.a b/target/aarch64-unknown-linux-musl/release/libsymblib_capi.a new file mode 100644 index 000000000..a4491dc0e Binary files /dev/null and b/target/aarch64-unknown-linux-musl/release/libsymblib_capi.a differ diff --git a/target/x86_64-unknown-linux-musl/release/libsymblib_capi.a b/target/x86_64-unknown-linux-musl/release/libsymblib_capi.a new file mode 100644 index 000000000..151c8dbc1 Binary files /dev/null and b/target/x86_64-unknown-linux-musl/release/libsymblib_capi.a differ diff --git a/testutils/helpers.go b/testutils/helpers.go index 075a9154b..7e8c8d2ab 100644 --- a/testutils/helpers.go +++ b/testutils/helpers.go @@ -112,6 +112,9 @@ func readTracePipe(ctx context.Context) { if errors.Is(err, io.EOF) { continue } + if errors.Is(err, os.ErrClosed) { + return + } log.Error(err) return } diff --git a/tools/coredump/coredump.go b/tools/coredump/coredump.go index 206452f68..042786c98 100644 --- a/tools/coredump/coredump.go +++ b/tools/coredump/coredump.go @@ -189,7 +189,8 @@ func ExtractTraces(ctx context.Context, pr process.Process, debug bool, includeTracers, _ := tracertypes.Parse("all") manager, err := pm.New(todo, includeTracers, monitorInterval, &coredumpEbpfMaps, - pm.NewMapFileIDMapper(), symCache, elfunwindinfo.NewStackDeltaProvider(), false, false) + pm.NewMapFileIDMapper(), symCache, elfunwindinfo.NewStackDeltaProvider(), false, false, + libpf.Set[string]{}) if err != nil { return nil, fmt.Errorf("failed to get Interpreter manager: %v", err) } diff --git a/tracehandler/metrics.go b/tracehandler/metrics.go index 055d40df5..50c8f555e 100644 --- a/tracehandler/metrics.go +++ b/tracehandler/metrics.go @@ -9,24 +9,14 @@ func (m *traceHandler) collectMetrics() { metrics.AddSlice([]metrics.Metric{ { ID: metrics.IDTraceCacheHit, - Value: metrics.MetricValue(m.umTraceCacheHit), + Value: metrics.MetricValue(m.traceCacheHit), }, { ID: metrics.IDTraceCacheMiss, - Value: metrics.MetricValue(m.umTraceCacheMiss), - }, - { - ID: metrics.IDKnownTracesHit, - Value: metrics.MetricValue(m.bpfTraceCacheHit), - }, - { - ID: metrics.IDKnownTracesMiss, - Value: metrics.MetricValue(m.bpfTraceCacheMiss), + Value: metrics.MetricValue(m.traceCacheMiss), }, }) - m.umTraceCacheHit = 0 - m.umTraceCacheMiss = 0 - m.bpfTraceCacheHit = 0 - m.bpfTraceCacheMiss = 0 + m.traceCacheHit = 0 + m.traceCacheMiss = 0 } diff --git a/tracehandler/tracehandler.go b/tracehandler/tracehandler.go index e32cb5ef0..80441f6b6 100644 --- a/tracehandler/tracehandler.go +++ b/tracehandler/tracehandler.go @@ -8,6 +8,7 @@ package tracehandler // import "go.opentelemetry.io/ebpf-profiler/tracehandler" import ( "context" "fmt" + "sync" "time" lru "github.com/elastic/go-freelru" @@ -21,10 +22,6 @@ import ( "go.opentelemetry.io/ebpf-profiler/reporter" ) -// metadataWarnInhibDuration defines the minimum duration between warnings printed -// about failure to obtain metadata for a single PID. -const metadataWarnInhibDuration = 1 * time.Minute - // Compile time check to make sure config.Times satisfies the interfaces. var _ Times = (*times.Times)(nil) @@ -33,6 +30,10 @@ type Times interface { MonitorInterval() time.Duration } +// Default lifetime of elements in the cache to reduce recurring +// symbolization efforts. +var traceCacheLifetime = 5 * time.Minute + // TraceProcessor is an interface used by traceHandler to convert traces // from a form received from eBPF to the form we wish to dispatch to the // collection agent. @@ -58,63 +59,60 @@ type TraceProcessor interface { // from the eBPF components. type traceHandler struct { // Metrics - umTraceCacheHit uint64 - umTraceCacheMiss uint64 - bpfTraceCacheHit uint64 - bpfTraceCacheMiss uint64 + traceCacheHit uint64 + traceCacheMiss uint64 traceProcessor TraceProcessor - // bpfTraceCache stores mappings from BPF to user-mode hashes. This allows + // traceCache stores mappings from BPF hashes to symbolized traces. This allows // avoiding the overhead of re-doing user-mode symbolization of traces that // we have recently seen already. - bpfTraceCache *lru.LRU[host.TraceHash, libpf.TraceHash] - - // umTraceCache is a LRU set that suppresses unnecessary resends of traces - // that we have recently reported to the collector already. - umTraceCache *lru.LRU[libpf.TraceHash, libpf.Void] + traceCache *lru.SyncedLRU[host.TraceHash, libpf.Trace] // reporter instance to use to send out traces. reporter reporter.TraceReporter - // metadataWarnInhib tracks inhibitions for warnings printed about failure to - // update container metadata (rate-limiting). - metadataWarnInhib *lru.LRU[libpf.PID, libpf.Void] - times Times } // newTraceHandler creates a new traceHandler -func newTraceHandler(rep reporter.TraceReporter, traceProcessor TraceProcessor, - intervals Times, cacheSize uint32) (*traceHandler, error) { - bpfTraceCache, err := lru.New[host.TraceHash, libpf.TraceHash]( +func newTraceHandler(ctx context.Context, rep reporter.TraceReporter, + traceProcessor TraceProcessor, intervals Times, cacheSize uint32) (*traceHandler, error) { + traceCache, err := lru.NewSynced[host.TraceHash, libpf.Trace]( cacheSize, func(k host.TraceHash) uint32 { return uint32(k) }) if err != nil { return nil, err } + // Do not hold elements indefinitely in the cache. + traceCache.SetLifetime(traceCacheLifetime) - umTraceCache, err := lru.New[libpf.TraceHash, libpf.Void]( - cacheSize, libpf.TraceHash.Hash32) - if err != nil { - return nil, err - } + var wg sync.WaitGroup + wg.Add(1) - metadataWarnInhib, err := lru.New[libpf.PID, libpf.Void](64, libpf.PID.Hash32) - if err != nil { - return nil, fmt.Errorf("failed to create metadata warning inhibitor LRU: %v", err) - } - metadataWarnInhib.SetLifetime(metadataWarnInhibDuration) - - t := &traceHandler{ - traceProcessor: traceProcessor, - bpfTraceCache: bpfTraceCache, - umTraceCache: umTraceCache, - reporter: rep, - times: intervals, - metadataWarnInhib: metadataWarnInhib, - } + go func() { + wg.Done() + ticker := time.NewTicker(traceCacheLifetime) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + traceCache.PurgeExpired() + } + } + }() + + // Wait to make sure the purge routine did start. + wg.Wait() - return t, nil + return &traceHandler{ + traceProcessor: traceProcessor, + traceCache: traceCache, + reporter: rep, + times: intervals, + }, nil } func (m *traceHandler) HandleTrace(bpfTrace *host.Trace) { @@ -129,19 +127,20 @@ func (m *traceHandler) HandleTrace(bpfTrace *host.Trace) { ExecutablePath: bpfTrace.ExecutablePath, Origin: bpfTrace.Origin, OffTime: bpfTrace.OffTime, + EnvVars: bpfTrace.EnvVars, } - if !m.reporter.SupportsReportTraceEvent() { - // Fast path: if the trace is already known remotely, we just send a counter update. - postConvHash, traceKnown := m.bpfTraceCache.Get(bpfTrace.Hash) - if traceKnown { - m.bpfTraceCacheHit++ - meta.APMServiceName = m.traceProcessor.MaybeNotifyAPMAgent(bpfTrace, postConvHash, 1) - m.reporter.ReportCountForTrace(postConvHash, 1, meta) - return + if trace, exists := m.traceCache.GetAndRefresh(bpfTrace.Hash, + traceCacheLifetime); exists { + m.traceCacheHit++ + // Fast path + meta.APMServiceName = m.traceProcessor.MaybeNotifyAPMAgent(bpfTrace, trace.Hash, 1) + if err := m.reporter.ReportTraceEvent(&trace, meta); err != nil { + log.Errorf("Failed to report trace event: %v", err) } - m.bpfTraceCacheMiss++ + return } + m.traceCacheMiss++ // Slow path: convert trace. umTrace, err := m.traceProcessor.ConvertTrace(bpfTrace) @@ -150,25 +149,12 @@ func (m *traceHandler) HandleTrace(bpfTrace *host.Trace) { panic(err) } log.Debugf("Trace hash remap 0x%x -> 0x%x", bpfTrace.Hash, umTrace.Hash) - m.bpfTraceCache.Add(bpfTrace.Hash, umTrace.Hash) + m.traceCache.Add(bpfTrace.Hash, *umTrace) meta.APMServiceName = m.traceProcessor.MaybeNotifyAPMAgent(bpfTrace, umTrace.Hash, 1) - if m.reporter.SupportsReportTraceEvent() { - m.reporter.ReportTraceEvent(umTrace, meta) - return - } - m.reporter.ReportCountForTrace(umTrace.Hash, 1, meta) - - // Trace already known to collector by UM hash? - if _, known := m.umTraceCache.Get(umTrace.Hash); known { - m.umTraceCacheHit++ - return + if err := m.reporter.ReportTraceEvent(umTrace, meta); err != nil { + log.Errorf("Failed to report trace event: %v", err) } - m.umTraceCacheMiss++ - - // Nope. Send it now. - m.reporter.ReportFramesForTrace(umTrace) - m.umTraceCache.Add(umTrace.Hash, libpf.Void{}) } // Start starts a goroutine that receives and processes trace updates over @@ -179,7 +165,7 @@ func Start(ctx context.Context, rep reporter.TraceReporter, traceProcessor Trace traceInChan <-chan *host.Trace, intervals Times, cacheSize uint32, ) (workerExited <-chan libpf.Void, err error) { handler, err := - newTraceHandler(rep, traceProcessor, intervals, cacheSize) + newTraceHandler(ctx, rep, traceProcessor, intervals, cacheSize) if err != nil { return nil, fmt.Errorf("failed to create traceHandler: %v", err) } diff --git a/tracehandler/tracehandler_test.go b/tracehandler/tracehandler_test.go index 945c10068..1ca980e10 100644 --- a/tracehandler/tracehandler_test.go +++ b/tracehandler/tracehandler_test.go @@ -5,10 +5,10 @@ package tracehandler_test import ( "context" + "maps" "testing" "time" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.opentelemetry.io/ebpf-profiler/host" @@ -52,48 +52,26 @@ type arguments struct { trace *host.Trace } -// reportedCount / reportedTrace hold the information reported from traceHandler -// via the reporter functions (reportCountForTrace / reportFramesForTrace). -type reportedCount struct { - traceHash libpf.TraceHash - count uint16 -} - -type reportedTrace struct { - traceHash libpf.TraceHash -} - type mockReporter struct { - t *testing.T - reportedCounts []reportedCount - reportedTraces []reportedTrace -} - -func (m *mockReporter) ReportFramesForTrace(trace *libpf.Trace) { - m.reportedTraces = append(m.reportedTraces, reportedTrace{traceHash: trace.Hash}) - m.t.Logf("reportFramesForTrace: new trace 0x%x", trace.Hash) -} - -func (m *mockReporter) ReportCountForTrace(traceHash libpf.TraceHash, - count uint16, _ *samples.TraceEventMeta) { - m.reportedCounts = append(m.reportedCounts, reportedCount{ - traceHash: traceHash, - count: count, - }) - m.t.Logf("reportCountForTrace: 0x%x count: %d", traceHash, count) + t *testing.T + reports map[libpf.TraceHash]uint16 } -func (m *mockReporter) SupportsReportTraceEvent() bool { return false } +func (m *mockReporter) ReportTraceEvent(trace *libpf.Trace, _ *samples.TraceEventMeta) error { + if _, exists := m.reports[trace.Hash]; exists { + m.reports[trace.Hash]++ + return nil + } + m.reports[trace.Hash] = 1 -func (m *mockReporter) ReportTraceEvent(_ *libpf.Trace, _ *samples.TraceEventMeta) { + return nil } func TestTraceHandler(t *testing.T) { tests := map[string]struct { input []arguments - expectedCounts []reportedCount - expectedTraces []reportedTrace expireTimeout time.Duration + expectedEvents map[libpf.TraceHash]uint16 }{ // no input simulates a case where no data is provided as input // to the functions of traceHandler. @@ -103,9 +81,8 @@ func TestTraceHandler(t *testing.T) { "single trace": {input: []arguments{ {trace: &host.Trace{Hash: host.TraceHash(0x1234)}}, }, - expectedTraces: []reportedTrace{{traceHash: libpf.NewTraceHash(0x1234, 0x1234)}}, - expectedCounts: []reportedCount{ - {traceHash: libpf.NewTraceHash(0x1234, 0x1234), count: 1}, + expectedEvents: map[libpf.TraceHash]uint16{ + libpf.NewTraceHash(0x1234, 0x1234): 1, }, }, @@ -114,10 +91,8 @@ func TestTraceHandler(t *testing.T) { {trace: &host.Trace{Hash: host.TraceHash(4)}}, {trace: &host.Trace{Hash: host.TraceHash(4)}}, }, - expectedTraces: []reportedTrace{{traceHash: libpf.NewTraceHash(4, 4)}}, - expectedCounts: []reportedCount{ - {traceHash: libpf.NewTraceHash(4, 4), count: 1}, - {traceHash: libpf.NewTraceHash(4, 4), count: 1}, + expectedEvents: map[libpf.TraceHash]uint16{ + libpf.NewTraceHash(4, 4): 2, }, }, } @@ -126,7 +101,10 @@ func TestTraceHandler(t *testing.T) { name := name test := test t.Run(name, func(t *testing.T) { - r := &mockReporter{t: t} + r := &mockReporter{ + t: t, + reports: make(map[libpf.TraceHash]uint16), + } traceChan := make(chan *host.Trace) ctx, cancel := context.WithCancel(context.Background()) @@ -142,23 +120,8 @@ func TestTraceHandler(t *testing.T) { cancel() <-exitNotify - assert.Equal(t, len(test.expectedCounts), len(r.reportedCounts)) - assert.Equal(t, len(test.expectedTraces), len(r.reportedTraces)) - - // Expected and reported traces order should match. - assert.Equal(t, test.expectedTraces, r.reportedTraces) - - for _, expCount := range test.expectedCounts { - // Expected and reported count order doesn't necessarily match. - found := false - for _, repCount := range r.reportedCounts { - if expCount == repCount { - found = true - break - } - } - assert.True(t, found, "Expected count %d for trace 0x%x not found", - expCount.count, expCount.traceHash) + if !maps.Equal(r.reports, test.expectedEvents) { + t.Fatalf("Expected %#v but got %#v", test.expectedEvents, r.reports) } }) } diff --git a/tracer/tracer.go b/tracer/tracer.go index 57e4d8e4d..46a4aba56 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -159,6 +159,9 @@ type Config struct { ProbabilisticThreshold uint // OffCPUThreshold is the user defined threshold for off-cpu profiling. OffCPUThreshold uint32 + // IncludeEnvVars holds a list of environment variables that should be captured and reported + // from processes + IncludeEnvVars libpf.Set[string] } // hookPoint specifies the group and name of the hooked point in the kernel. @@ -299,7 +302,7 @@ func NewTracer(ctx context.Context, cfg *Config) (*Tracer, error) { processManager, err := pm.New(ctx, cfg.IncludeTracers, cfg.Intervals.MonitorInterval(), ebpfHandler, nil, cfg.Reporter, elfunwindinfo.NewStackDeltaProvider(), - cfg.FilterErrorFrames, cfg.CollectCustomLabels) + cfg.FilterErrorFrames, cfg.CollectCustomLabels, cfg.IncludeEnvVars) if err != nil { return nil, fmt.Errorf("failed to create processManager: %v", err) } @@ -548,26 +551,24 @@ func loadAllMaps(coll *cebpf.CollectionSpec, cfg *Config, adaption := make(map[string]uint32, 4) const ( - // The following sizes X are used as 2^X, and determined empirically - + // The following sizes X are used as 2^X, and determined empirically. // 1 million executable pages / 4GB of executable address space - pidPageMappingInfoSize = 20 - + pidPageMappingInfoSize = 20 stackDeltaPageToInfoSize = 16 exeIDToStackDeltasSize = 16 ) adaption["pid_page_to_mapping_info"] = 1 << uint32(pidPageMappingInfoSize+cfg.MapScaleFactor) + adaption["stack_delta_page_to_info"] = 1 << uint32(stackDeltaPageToInfoSize+cfg.MapScaleFactor) - // To not loose too many scheduling events but also not oversize - // sched_times, calculate a size based on some assumptions. - // On modern systems /proc/sys/kernel/pid_max defaults to 4194304. - // Try to fit this PID space scaled down with cfg.OffCPUThreshold into - // this map. - adaption["sched_times"] = (4194304 * cfg.OffCPUThreshold) / support.OffCPUThresholdMax + // To not lose too many scheduling events but also not oversize sched_times, + // calculate a size based on an assumed upper bound of scheduler events per + // second (1000hz) multiplied by an average time a task remains off CPU (3s), + // scaled by the probability of capturing a trace. + adaption["sched_times"] = (4096 * cfg.OffCPUThreshold) / support.OffCPUThresholdMax for i := support.StackDeltaBucketSmallest; i <= support.StackDeltaBucketLargest; i++ { mapName := fmt.Sprintf("exe_id_to_%d_stack_deltas", i) @@ -1004,6 +1005,7 @@ func (t *Tracer) loadBpfTrace(raw []byte, cpu int) *host.Trace { OffTime: int64(ptr.offtime), KTime: times.KTime(ptr.ktime), CPU: cpu, + EnvVars: procMeta.EnvVariables, } if trace.Origin != support.TraceOriginSampling && trace.Origin != support.TraceOriginOffCPU { diff --git a/util/util.go b/util/util.go index 26fa723f7..d29d87a91 100644 --- a/util/util.go +++ b/util/util.go @@ -5,36 +5,13 @@ package util // import "go.opentelemetry.io/ebpf-profiler/util" import ( "math/bits" - "strconv" "sync/atomic" "unicode" "unicode/utf8" - "github.com/sirupsen/logrus" - "go.opentelemetry.io/ebpf-profiler/libpf/hash" ) -// HexToUint64 is a convenience function to extract a hex string to a uint64 and -// not worry about errors. Essentially a "mustConvertHexToUint64". -func HexToUint64(str string) uint64 { - v, err := strconv.ParseUint(str, 16, 64) - if err != nil { - logrus.Fatalf("Failure to hex-convert %s to uint64: %v", str, err) - } - return v -} - -// DecToUint64 is a convenience function to extract a decimal string to a uint64 -// and not worry about errors. Essentially a "mustConvertDecToUint64". -func DecToUint64(str string) uint64 { - v, err := strconv.ParseUint(str, 10, 64) - if err != nil { - logrus.Fatalf("Failure to dec-convert %s to uint64: %v", str, err) - } - return v -} - // IsValidString checks if string is UTF-8-encoded and only contains expected characters. func IsValidString(s string) bool { if s == "" {