From a3985b820301811a902b8f168e42f956c5daa230 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Mon, 15 Dec 2025 14:43:26 +0100 Subject: [PATCH 01/35] WIP Fuzzer --- .gitignore | 3 + .../datadog/profiling/stack/CMakeLists.txt | 15 ++ .../profiling/stack/echion/echion/vm.h | 13 ++ .../profiling/stack_v2/fuzz/CMakeLists.txt | 39 ++++ .../stack_v2/fuzz/fuzz_echion_remote_read.cpp | 188 ++++++++++++++++++ docker/Dockerfile.fuzz | 43 ++++ 6 files changed, 301 insertions(+) create mode 100644 ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt create mode 100644 ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp create mode 100644 docker/Dockerfile.fuzz diff --git a/.gitignore b/.gitignore index 13961fcebc7..d8e9f52319a 100644 --- a/.gitignore +++ b/.gitignore @@ -208,3 +208,6 @@ tests/appsec/iast/fixtures/taint_sinks/not_exists.txt # Rust build artifacts src/native/target* + +# Fuzzing corpus, output and artifacts +.fuzz/ \ No newline at end of file diff --git a/ddtrace/internal/datadog/profiling/stack/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/CMakeLists.txt index 918092580b1..1158ae7db40 100644 --- a/ddtrace/internal/datadog/profiling/stack/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/CMakeLists.txt @@ -151,3 +151,18 @@ if(BUILD_TESTING) enable_testing() add_subdirectory(test) endif() + +# Fuzzing harnesses (off by default) +if(NOT DEFINED BUILD_FUZZING) + set(BUILD_FUZZING + OFF + CACHE BOOL "Build fuzzing harnesses for stack_v2/echion") +else() + set(BUILD_FUZZING + ON + CACHE BOOL "Build fuzzing harnesses for stack_v2/echion") +endif() + +if(BUILD_FUZZING) + add_subdirectory(fuzz) +endif() diff --git a/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h b/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h index cbe884294d7..5fc07fda976 100644 --- a/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h +++ b/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h @@ -162,8 +162,21 @@ init_safe_copy() * * @return zero on success, otherwise non-zero. */ +#if defined(ECHION_FUZZING) +// Let the fuzzing harness control the copy_memory behavior, so we can simulate "garbage" reads. +extern "C" int +echion_fuzz_copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf); + +int +copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf) +{ + return echion_fuzz_copy_memory(proc_ref, addr, len, buf); +} +#else +// Implementation in vm.cc int copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf); +#endif inline pid_t pid = 0; diff --git a/ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt new file mode 100644 index 00000000000..fe747a8582a --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt @@ -0,0 +1,39 @@ +cmake_minimum_required(VERSION 3.19) + +# Fuzz targets are built only when stack_v2/CMakeLists.txt has BUILD_FUZZING=ON. +# The caller is expected to provide compiler/linker flags for libFuzzer +# (e.g. -fsanitize=fuzzer,address,undefined). + +option(STACKV2_USE_LIBFUZZER "Link fuzz targets with libFuzzer (-fsanitize=fuzzer)" OFF) + +add_executable(fuzz_echion_remote_read fuzz_echion_remote_read.cpp ../src/echion/frame.cc ../src/echion/danger.cc) + +target_include_directories(fuzz_echion_remote_read PRIVATE ../include) +target_include_directories(fuzz_echion_remote_read SYSTEM PRIVATE ${Python3_INCLUDE_DIRS} ../echion ../include/vendored + ../include/util) + +# Ensure echion headers take the fuzz hook in vm.h +target_compile_definitions(fuzz_echion_remote_read PRIVATE ECHION_FUZZING) + +# When building with libFuzzer, add the fuzzer runtime only for this target. +if(STACKV2_USE_LIBFUZZER) + target_compile_definitions(fuzz_echion_remote_read PRIVATE FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) + target_compile_options(fuzz_echion_remote_read PRIVATE -fsanitize=fuzzer,address,undefined -fno-omit-frame-pointer) + target_link_options(fuzz_echion_remote_read PRIVATE -fsanitize=fuzzer,address,undefined) +endif() + +# Echion sources need to be given the current platform +if(APPLE) + target_compile_definitions(fuzz_echion_remote_read PRIVATE PL_DARWIN) +elseif(UNIX) + target_compile_definitions(fuzz_echion_remote_read PRIVATE PL_LINUX) +endif() + +# Use the same ddup config helper for sanitizer/rpath defaults. +add_ddup_config(fuzz_echion_remote_read) + +if(Python3_LIBRARIES) + target_link_libraries(fuzz_echion_remote_read PRIVATE ${Python3_LIBRARIES}) +endif() + + diff --git a/ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp b/ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp new file mode 100644 index 00000000000..eb60732f3e7 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp @@ -0,0 +1,188 @@ +// Fuzz harness (raw): treat libFuzzer input bytes as a remote memory image and +// let echion attempt to interpret *any* garbage. +// +// This is intentionally a "minimal structure" harness: we do not synthesize +// valid CPython object layouts. Instead, we pass fuzz-derived remote addresses +// into echion APIs and rely on echion's own size caps (e.g. MAX_MIRROR_SIZE, +// MAX_STRING_SIZE) to keep the harness stable. + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#if PY_VERSION_HEX >= 0x030b0000 +#include +#endif + +#if PY_VERSION_HEX >= 0x030a0000 +// Expose PyCodeObject fields for local stack allocation and field assignment. +// Echion itself uses internal headers; the fuzz harness does the same. +#ifndef Py_BUILD_CORE +#define Py_BUILD_CORE +#endif +#include +#endif + +namespace { + +static constexpr uintptr_t kRemoteBase = 0x10000000ULL; + +static thread_local const uint8_t* g_data = nullptr; +static thread_local size_t g_size = 0; + +static inline uintptr_t +addr_from_u64(uint64_t v) +{ + if (g_size == 0) { + return kRemoteBase; + } + return kRemoteBase + static_cast(v % g_size); +} + +static inline uint64_t +load_u64_le(const uint8_t* data, size_t size, size_t off) +{ + uint64_t v = 0; + if (off >= size) { + return 0; + } + const size_t n = std::min(8, size - off); + std::memcpy(&v, data + off, n); + return v; +} + +static inline int +load_int_le(const uint8_t* data, size_t size, size_t off) +{ + int v = 0; + if (off >= size) { + return 0; + } + const size_t n = std::min(4, size - off); + std::memcpy(&v, data + off, n); + return v; +} + +} // namespace + +extern "C" int +echion_fuzz_copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf) +{ + (void)proc_ref; + + // Return 0 on success, non-zero on failure (matches copy_memory contract). + if (!g_data || !buf || len < 0) { + return -1; + } + + // Keep individual reads bounded to avoid pathological slow paths. + static constexpr size_t kMaxCopy = 2U << 20; // 2 MiB + if (static_cast(len) > kMaxCopy) { + return -1; + } + + uintptr_t a = reinterpret_cast(addr); + if (a >= kRemoteBase) { + size_t off = static_cast(a - kRemoteBase); + if (off + static_cast(len) <= g_size) { + std::memcpy(buf, g_data + off, static_cast(len)); + return 0; + } + } + + return -1; +} + +extern "C" int +LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) +{ + g_data = data; + g_size = size; + + if (size == 0) { + return 0; + } + + // Pick fuzz generated values. + // pointers: "remote address" inside the input data + uintptr_t p0 = addr_from_u64(load_u64_le(data, size, 0)); + uintptr_t p1 = addr_from_u64(load_u64_le(data, size, 8)); + uintptr_t p2 = addr_from_u64(load_u64_le(data, size, 16)); + + // lasti: last instruction index, used by Frame::create() + int lasti = load_int_le(data, size, 24); + +#if PY_VERSION_HEX >= 0x030b0000 + { + StackChunk sc; + (void)sc.update(reinterpret_cast<_PyStackChunk*>(p0)); + (void)sc.resolve(reinterpret_cast(p1)); + } +#endif + + { + // Create a *local* PyCodeObject with pointers to arbitrary remote garbage. + // Frame::create() will attempt to read those remote objects via copy_type/copy_generic. + PyCodeObject code{}; + code.co_firstlineno = 1; + code.co_filename = reinterpret_cast(p0); + +#if PY_VERSION_HEX >= 0x030b0000 + code.co_qualname = reinterpret_cast(p1); + code.co_linetable = reinterpret_cast(p2); +#elif PY_VERSION_HEX >= 0x030a0000 + code.co_name = reinterpret_cast(p1); + code.co_linetable = reinterpret_cast(p2); +#else + code.co_name = reinterpret_cast(p1); + code.co_lnotab = reinterpret_cast(p2); +#endif + + // CORE of the fuzz harness + (void)Frame::create(&code, lasti); + // TODO: Call more internal functions to trigger more code paths + // Possible ideas: + // - MirrorSet::* + // - StackChunk::* + // - ThreadInfo::* + } + + g_data = nullptr; + g_size = 0; + return 0; +} + +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +// Standalone entrypoint for quick sanity runs without linking libFuzzer. +// When building with libFuzzer, the fuzzer runtime provides `main()`. +#include +#include + +int +main(int argc, char** argv) +{ + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " \n"; + return 2; + } + + std::ifstream f(argv[1], std::ios::binary); + if (!f) { + std::cerr << "Failed to open input file\n"; + return 2; + } + + std::vector data((std::istreambuf_iterator(f)), std::istreambuf_iterator()); + (void)LLVMFuzzerTestOneInput(data.data(), data.size()); + return 0; +} +#endif + + diff --git a/docker/Dockerfile.fuzz b/docker/Dockerfile.fuzz new file mode 100644 index 00000000000..00f29465b9f --- /dev/null +++ b/docker/Dockerfile.fuzz @@ -0,0 +1,43 @@ +# Minimal fuzzing image for the stack_v2/echion harness (libFuzzer + ASAN/UBSAN). +# +# Build: +# $ docker build -f docker/Dockerfile.fuzz -t ddtrace-py-stackv2-fuzz . +# Run: +# $ docker run --rm -it -v "$PWD/.fuzz:/fuzz" ddtrace-py-stackv2-fuzz + +FROM debian:trixie-slim + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + clang \ + cmake \ + git \ + libclang-rt-dev \ + lld \ + make \ + ninja-build \ + python3 \ + python3-dev \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /src +COPY . /src + +# Build only the fuzz target; skip the Python extension (avoids libdd_wrapper dependency). +RUN cmake -S ddtrace/internal/datadog/profiling/stack_v2 -B /build \ + -DBUILD_FUZZING=ON -DBUILD_TESTING=OFF -DSTACKV2_USE_LIBFUZZER=ON \ + -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_C_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,undefined" \ + -DCMAKE_CXX_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,undefined" \ + -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=address,undefined" \ + && cmake --build /build -j --target fuzz_echion_remote_read + +# RUN mkdir -p /fuzz/corpus /fuzz/out + +CMD ["/build/fuzz/fuzz_echion_remote_read", "/fuzz/", "-artifact_prefix=/fuzz/"] + + From d669a178347df51b4f252d72d79904368e806343 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 17 Dec 2025 17:24:29 +0100 Subject: [PATCH 02/35] onboard to internal fuzzing infra --- .gitlab-ci.yml | 2 + .gitlab/fuzz.yml | 29 +++ .gitlab/scripts/fuzz_infra.py | 243 ++++++++++++++++++ .../{stack_v2 => stack}/fuzz/CMakeLists.txt | 6 +- .../datadog/profiling/stack/fuzz/build.sh | 32 +++ .../fuzz/fuzz_echion_remote_read.cpp | 6 +- docker/Dockerfile.fuzz | 20 +- 7 files changed, 320 insertions(+), 18 deletions(-) create mode 100644 .gitlab/fuzz.yml create mode 100755 .gitlab/scripts/fuzz_infra.py rename ddtrace/internal/datadog/profiling/{stack_v2 => stack}/fuzz/CMakeLists.txt (88%) create mode 100755 ddtrace/internal/datadog/profiling/stack/fuzz/build.sh rename ddtrace/internal/datadog/profiling/{stack_v2 => stack}/fuzz/fuzz_echion_remote_read.cpp (99%) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d225788f5e5..584b2a48328 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,5 @@ stages: + - fuzz - package - tests - shared-pipeline @@ -48,6 +49,7 @@ include: - local: ".gitlab/multi-os-tests.yml" - local: ".gitlab/benchmarks/serverless.yml" - local: ".gitlab/native.yml" + - local: ".gitlab/fuzz.yml" tests-gen: stage: tests diff --git a/.gitlab/fuzz.yml b/.gitlab/fuzz.yml new file mode 100644 index 00000000000..7f22bf4c7b0 --- /dev/null +++ b/.gitlab/fuzz.yml @@ -0,0 +1,29 @@ +variables: + REPO_LANG: python # "python" is used everywhere rather than "py" + # CI_DEBUG_SERVICES: "true" + +fuzz_infra: + image: + name: registry.ddbuild.io/images/mirror/ubuntu:24.04 + tags: ["arch:amd64"] + stage: fuzz + timeout: 30m + allow_failure: true + rules: + # runs on gitlab schedule and on merge to main. + # Also allow manual run in branches for ease of debug / testing + - if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "schedule"' + allow_failure: true + - if: $CI_COMMIT_BRANCH == "main" + allow_failure: true + - when: manual + allow_failure: true + before_script: + # Install build dependencies (same as docker/Dockerfile.fuzz) + - apt-get update && apt-get install -y --no-install-recommends ca-certificates clang cmake git libclang-rt-dev lld make ninja-build python3 python3-dev python3-pip curl unzip + - python3 -m pip install requests --break-system-packages + # Install vault for fuzzing API authentication + - VAULT_VERSION=1.21.1 && curl -fsSL "https://releases.hashicorp.com/vault/${VAULT_VERSION}/vault_${VAULT_VERSION}_linux_amd64.zip" -o vault.zip && unzip vault.zip && mv vault /usr/local/bin/vault && rm vault.zip && chmod +x /usr/local/bin/vault + - git config --global --add safe.directory ${CI_PROJECT_DIR} + script: + - python3 .gitlab/scripts/fuzz_infra.py diff --git a/.gitlab/scripts/fuzz_infra.py b/.gitlab/scripts/fuzz_infra.py new file mode 100755 index 00000000000..85fc605babe --- /dev/null +++ b/.gitlab/scripts/fuzz_infra.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 + +# This script enables "0 click onboarding" for new fuzzer in the dd-trace-py repository. +# This means that any new fuzzer should be automatically detected and run in the internal infrastructure with enrichments, reporting, triaging, auto fix etc.. +# Reports are submitted via Slack, with the channel defined by SLACK_CHANNEL +# +# Requirements: +# +# This scripts assumes that: +# - Each fuzz target is built in a separate build directory named `fuzz` and having a `build.sh` script that builds the target. +# - The build script appends the path to the built binary to a "MANIFEST_FILE", allowing the discovery of each fuzz target by the script. + +from __future__ import annotations + +from dataclasses import dataclass +import glob +import os +import subprocess +import sys +from typing import List + +import requests + +# TODO: replace me to dd-trace-py ops' slack channel once initial onboarding is done +SLACK_CHANNEL = "fuzzing-ops" +TEAM_NAME = "profiling-python" +REPOSITORY_URL = "https://github.com/DataDog/dd-trace-py" +PROJECT_NAME = "dd-trace-py" +# We currently only support libfuzzer for this repository. +FUZZ_TYPE = "libfuzzer" +API_URL = "https://fuzzing-api.us1.ddbuild.io/api/v1" + +# Paths and constants for script execution +REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +FUZZER_BINARY_BASE_PATH = "/tmp/fuzz/build" +MANIFEST_FILE = os.path.join(FUZZER_BINARY_BASE_PATH, "fuzz_binaries.txt") +MAX_PKG_NAME_LENGTH = 50 +VAULT_PATH = "vault" + + +@dataclass(frozen=True) +class FuzzBinary: + """Represents a built fuzz binary ready for upload.""" + + pkgname: str + binary_name: str + binary_path: str + + +def build_and_upload_fuzz( + team: str = TEAM_NAME, + slack_channel: str = SLACK_CHANNEL, + repository_url: str = REPOSITORY_URL, +) -> None: + git_sha = os.popen("git rev-parse HEAD").read().strip() + + # Step 1: Discover and run all build scripts + build_scripts = discover_build_scripts(REPO_ROOT) + if not build_scripts: + print(f"❌ No fuzz build scripts found under {REPO_ROOT}") + return + + # Clear any previous manifest file + if os.path.exists(MANIFEST_FILE): + os.remove(MANIFEST_FILE) + + for build_script in build_scripts: + run_build_script(build_script) + + # Step 2: Read the manifest file to discover built binaries + binaries = read_manifest(MANIFEST_FILE) + if not binaries: + print(f"❌ No fuzz binaries found in manifest {MANIFEST_FILE}") + return + + # Step 3: Upload and create a fuzzer for each binary + for binary in binaries: + upload_binary(binary, git_sha) + create_fuzzer(binary, git_sha, team, slack_channel, repository_url) + + print("✅ Fuzzing infrastructure setup completed successfully!") + + +def get_package_name(binary_name: str) -> str: + """ + Generate a package name for the fuzzing platform from a binary name. + It's prefixed with the repository name so it's easier to filter. + The package name is limited by k8s labels format: must be < 63 chars, alphamumeric and hyphen. + """ + return PROJECT_NAME + "-" + binary_name[:MAX_PKG_NAME_LENGTH].replace("_", "-") + + +def _is_executable(file_path: str) -> bool: + return os.path.isfile(file_path) and os.access(file_path, os.X_OK) + + +def discover_build_scripts(repo_root: str) -> List[str]: + """ + Discover fuzz build scripts by looking for '**/fuzz/build.sh' + + This allows for "0 click onboarding" for new fuzz harnesses. + """ + build_scripts: List[str] = [] + for build_script in glob.glob(os.path.join(repo_root, "**/fuzz/build.sh"), recursive=True): + print(f"Found build script: {build_script}") + build_scripts.append(build_script) + return build_scripts + + +def run_build_script(build_script: str) -> None: + """Run a fuzz build script.""" + fuzz_dir = os.path.dirname(build_script) + print(f"Building fuzz directory: {fuzz_dir}") + + if not os.path.isfile(build_script): + raise FileNotFoundError(build_script) + + try: + result = subprocess.run( + [build_script], + cwd=fuzz_dir, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + print(result.stdout) + if result.stderr: + print(result.stderr) + except subprocess.CalledProcessError as e: + print(f"❌ Build script failed with exit code {e.returncode}") + print(f"Command: {e.cmd}") + if e.stdout: + print(f"stdout:\n{e.stdout}") + if e.stderr: + print(f"stderr:\n{e.stderr}") + raise + + print(f"✅ Built fuzzers from {build_script}") + + +def read_manifest(manifest_path: str) -> List[FuzzBinary]: + """ + Read the manifest file created by build scripts to discover built binaries. + + Each build script appends its binary path(s) to this file. + """ + binaries: List[FuzzBinary] = [] + + if not os.path.isfile(manifest_path): + print(f"⚠️ No manifest file found at {manifest_path}") + return binaries + + with open(manifest_path) as f: + for line in f: + binary_path = line.strip() + if not binary_path: + continue + if not os.path.isfile(binary_path): + print(f"⚠️ Binary listed in manifest not found: {binary_path}") + continue + if not _is_executable(binary_path): + print(f"⚠️ Binary listed in manifest is not executable: {binary_path}") + continue + + binary_name = os.path.basename(binary_path) + print(f"Found fuzz binary: {binary_path}") + binaries.append( + FuzzBinary( + pkgname=get_package_name(binary_name), + binary_name=binary_name, + binary_path=binary_path, + ) + ) + + return binaries + + +def create_fuzzer(binary: FuzzBinary, git_sha: str, team: str, slack_channel: str, repository_url: str) -> bool: + """Register a fuzzer with the fuzzing platform.""" + print(f"Starting fuzzer for {binary.pkgname} ({binary.binary_name})...") + run_payload = { + "app": binary.pkgname, + "debug": False, + "version": git_sha, + "type": FUZZ_TYPE, + "binary": binary.binary_name, + "team": team, + "slack_channel": slack_channel, + "repository_url": repository_url, + } + try: + response = requests.post( + f"{API_URL}/apps/{binary.pkgname}/fuzzers", headers=get_headers(), json=run_payload, timeout=30 + ) + response.raise_for_status() + print(f"✅ Started fuzzer for {binary.pkgname} ({binary.binary_name})") + print(response.json()) + except Exception as e: + print(f"❌ Failed to start fuzzer for {binary.pkgname} ({binary.binary_name}): {e}") + return True + + return False + + +def upload_binary(binary: FuzzBinary, git_sha: str) -> bool: + """Upload a fuzz binary to the fuzzing platform.""" + try: + # Get presigned URL so we can use s3 uploading + print(f"Getting presigned URL for {binary.pkgname} ({binary.binary_name})...") + presigned_response = requests.post( + f"{API_URL}/apps/{binary.pkgname}/builds/{git_sha}/url", headers=get_headers(), timeout=30 + ) + + presigned_response.raise_for_status() + presigned_url = presigned_response.json()["data"]["url"] + + print(f"Uploading {binary.pkgname} ({binary.binary_name}) for {git_sha}...") + with open(binary.binary_path, "rb") as f: + upload_response = requests.put(presigned_url, data=f, timeout=300) + upload_response.raise_for_status() + print(f"✅ Uploaded {binary.binary_name}") + except Exception as e: + print(f"❌ Failed to upload binary for {binary.pkgname} ({binary.binary_name}): {e}") + return True + return False + + +def get_headers(): + auth_header = ( + os.popen(f"{VAULT_PATH} read -field=token identity/oidc/token/security-fuzzing-platform").read().strip() + ) + return {"Authorization": f"Bearer {auth_header}", "Content-Type": "application/json"} + + +if __name__ == "__main__": + print("🚀 Starting fuzzing infrastructure setup...") + try: + build_and_upload_fuzz() + print("✅ Fuzzing infrastructure setup completed successfully!") + except Exception as e: + print(f"❌ Failed to set up fuzzing infrastructure: {e}") + sys.exit(1) diff --git a/ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt similarity index 88% rename from ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt rename to ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt index fe747a8582a..73f268bc3da 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt @@ -1,10 +1,10 @@ cmake_minimum_required(VERSION 3.19) -# Fuzz targets are built only when stack_v2/CMakeLists.txt has BUILD_FUZZING=ON. +# Fuzz targets are built only when stack/CMakeLists.txt has BUILD_FUZZING=ON. # The caller is expected to provide compiler/linker flags for libFuzzer # (e.g. -fsanitize=fuzzer,address,undefined). -option(STACKV2_USE_LIBFUZZER "Link fuzz targets with libFuzzer (-fsanitize=fuzzer)" OFF) +option(STACK_USE_LIBFUZZER "Link fuzz targets with libFuzzer (-fsanitize=fuzzer)" OFF) add_executable(fuzz_echion_remote_read fuzz_echion_remote_read.cpp ../src/echion/frame.cc ../src/echion/danger.cc) @@ -16,7 +16,7 @@ target_include_directories(fuzz_echion_remote_read SYSTEM PRIVATE ${Python3_INCL target_compile_definitions(fuzz_echion_remote_read PRIVATE ECHION_FUZZING) # When building with libFuzzer, add the fuzzer runtime only for this target. -if(STACKV2_USE_LIBFUZZER) +if(STACK_USE_LIBFUZZER) target_compile_definitions(fuzz_echion_remote_read PRIVATE FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) target_compile_options(fuzz_echion_remote_read PRIVATE -fsanitize=fuzzer,address,undefined -fno-omit-frame-pointer) target_link_options(fuzz_echion_remote_read PRIVATE -fsanitize=fuzzer,address,undefined) diff --git a/ddtrace/internal/datadog/profiling/stack/fuzz/build.sh b/ddtrace/internal/datadog/profiling/stack/fuzz/build.sh new file mode 100755 index 00000000000..52f70bdd4b6 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/build.sh @@ -0,0 +1,32 @@ +#!/bin/bash +set -e + +TARGET=fuzz_echion_remote_read +BUILD_DIR=/tmp/fuzz/build +MANIFEST_FILE="${BUILD_DIR}/fuzz_binaries.txt" + +# Get the directory where this script lives, then go up one level to the stack source +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" + +echo "Building fuzz target: $TARGET" +echo "Source directory: $SOURCE_DIR" + +cmake -S "${SOURCE_DIR}" -B "${BUILD_DIR}" \ + -DBUILD_FUZZING=ON -DBUILD_TESTING=OFF -DSTACK_USE_LIBFUZZER=ON \ + -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_C_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,undefined" \ + -DCMAKE_CXX_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,undefined" \ + -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=address,undefined" \ + && cmake --build "${BUILD_DIR}" -j --target $TARGET + +# Register the built binary in the manifest file for the CI infrastructure to discover +BINARY_PATH="${BUILD_DIR}/fuzz/${TARGET}" +if [ -x "${BINARY_PATH}" ]; then + echo "${BINARY_PATH}" >> "${MANIFEST_FILE}" + echo "✅ Registered binary in manifest: ${BINARY_PATH}" +else + echo "❌ Binary not found or not executable: ${BINARY_PATH}" + exit 1 +fi \ No newline at end of file diff --git a/ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp b/ddtrace/internal/datadog/profiling/stack/fuzz/fuzz_echion_remote_read.cpp similarity index 99% rename from ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp rename to ddtrace/internal/datadog/profiling/stack/fuzz/fuzz_echion_remote_read.cpp index eb60732f3e7..6a0e69d5a50 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/fuzz_echion_remote_read.cpp @@ -147,8 +147,8 @@ LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) // CORE of the fuzz harness (void)Frame::create(&code, lasti); - // TODO: Call more internal functions to trigger more code paths - // Possible ideas: + // TODO: Call more internal functions to trigger more code paths + // Possible ideas: // - MirrorSet::* // - StackChunk::* // - ThreadInfo::* @@ -184,5 +184,3 @@ main(int argc, char** argv) return 0; } #endif - - diff --git a/docker/Dockerfile.fuzz b/docker/Dockerfile.fuzz index 00f29465b9f..4d8ae7265f2 100644 --- a/docker/Dockerfile.fuzz +++ b/docker/Dockerfile.fuzz @@ -21,23 +21,21 @@ RUN apt-get update \ ninja-build \ python3 \ python3-dev \ - && rm -rf /var/lib/apt/lists/* + python3-pip \ + curl \ + unzip \ + && rm -rf /var/lib/apt/lists/* \ + && python3 -m pip install requests --break-system-packages + +RUN VAULT_VERSION=1.21.1 && curl -fsSL "https://releases.hashicorp.com/vault/${VAULT_VERSION}/vault_${VAULT_VERSION}_linux_amd64.zip" -o vault.zip && unzip vault.zip && mv vault /usr/local/bin/vault && rm vault.zip && chmod +x /usr/local/bin/vault WORKDIR /src COPY . /src -# Build only the fuzz target; skip the Python extension (avoids libdd_wrapper dependency). -RUN cmake -S ddtrace/internal/datadog/profiling/stack_v2 -B /build \ - -DBUILD_FUZZING=ON -DBUILD_TESTING=OFF -DSTACKV2_USE_LIBFUZZER=ON \ - -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_C_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,undefined" \ - -DCMAKE_CXX_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,undefined" \ - -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=address,undefined" \ - && cmake --build /build -j --target fuzz_echion_remote_read +RUN /src/ddtrace/internal/datadog/profiling/stack/fuzz/build.sh # RUN mkdir -p /fuzz/corpus /fuzz/out -CMD ["/build/fuzz/fuzz_echion_remote_read", "/fuzz/", "-artifact_prefix=/fuzz/"] +CMD ["/tmp/fuzz/build/fuzz/fuzz_echion_remote_read", "/tmp/fuzz/", "-artifact_prefix=/tmp/fuzz/"] From 7b82628854554ce3e6dcd099af9978fde2adc623 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 14 Jan 2026 17:14:52 +0100 Subject: [PATCH 03/35] fix rebase errors --- .../internal/datadog/profiling/stack/echion/echion/vm.h | 2 +- .../internal/datadog/profiling/stack/fuzz/CMakeLists.txt | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h b/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h index 5fc07fda976..ea0d818954b 100644 --- a/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h +++ b/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h @@ -167,7 +167,7 @@ init_safe_copy() extern "C" int echion_fuzz_copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf); -int +inline int copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf) { return echion_fuzz_copy_memory(proc_ref, addr, len, buf); diff --git a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt index 73f268bc3da..71ac8ca6071 100644 --- a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt @@ -6,7 +6,13 @@ cmake_minimum_required(VERSION 3.19) option(STACK_USE_LIBFUZZER "Link fuzz targets with libFuzzer (-fsanitize=fuzzer)" OFF) -add_executable(fuzz_echion_remote_read fuzz_echion_remote_read.cpp ../src/echion/frame.cc ../src/echion/danger.cc) +add_executable(fuzz_echion_remote_read + fuzz_echion_remote_read.cpp + ../src/echion/frame.cc + ../src/echion/danger.cc + ../src/echion/stack_chunk.cc + ../src/echion/strings.cc +) target_include_directories(fuzz_echion_remote_read PRIVATE ../include) target_include_directories(fuzz_echion_remote_read SYSTEM PRIVATE ${Python3_INCLUDE_DIRS} ../echion ../include/vendored From aa636fac76b7c5a176703d3dcacca76e5cbb9308 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 14 Jan 2026 17:18:11 +0100 Subject: [PATCH 04/35] long.cc ? --- ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt index 71ac8ca6071..da9091f79ff 100644 --- a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt @@ -12,6 +12,7 @@ add_executable(fuzz_echion_remote_read ../src/echion/danger.cc ../src/echion/stack_chunk.cc ../src/echion/strings.cc + ../src/echion/long.cc ) target_include_directories(fuzz_echion_remote_read PRIVATE ../include) From 88f1c5a5eb039eb0b73b96e9c23a0ad3d62544c3 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 14 Jan 2026 17:28:40 +0100 Subject: [PATCH 05/35] linter --- .gitlab/scripts/fuzz_infra.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/scripts/fuzz_infra.py b/.gitlab/scripts/fuzz_infra.py index 85fc605babe..5342b0f9199 100755 --- a/.gitlab/scripts/fuzz_infra.py +++ b/.gitlab/scripts/fuzz_infra.py @@ -3,7 +3,7 @@ # This script enables "0 click onboarding" for new fuzzer in the dd-trace-py repository. # This means that any new fuzzer should be automatically detected and run in the internal infrastructure with enrichments, reporting, triaging, auto fix etc.. # Reports are submitted via Slack, with the channel defined by SLACK_CHANNEL -# +# # Requirements: # # This scripts assumes that: From 36d3fd8d53fbd7cd110502f54446608374f04b35 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 14 Jan 2026 17:44:36 +0100 Subject: [PATCH 06/35] format --- .gitlab/scripts/fuzz_infra.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.gitlab/scripts/fuzz_infra.py b/.gitlab/scripts/fuzz_infra.py index 5342b0f9199..7a4eba4ce35 100755 --- a/.gitlab/scripts/fuzz_infra.py +++ b/.gitlab/scripts/fuzz_infra.py @@ -1,14 +1,17 @@ #!/usr/bin/env python3 # This script enables "0 click onboarding" for new fuzzer in the dd-trace-py repository. -# This means that any new fuzzer should be automatically detected and run in the internal infrastructure with enrichments, reporting, triaging, auto fix etc.. +# This means that any new fuzzer should be automatically detected and run in the internal +# infrastructure with enrichments, reporting, triaging, auto fix etc... # Reports are submitted via Slack, with the channel defined by SLACK_CHANNEL # # Requirements: # # This scripts assumes that: -# - Each fuzz target is built in a separate build directory named `fuzz` and having a `build.sh` script that builds the target. -# - The build script appends the path to the built binary to a "MANIFEST_FILE", allowing the discovery of each fuzz target by the script. +# - Each fuzz target is built in a separate build directory named `fuzz` and having a `build.sh` script that builds +# the target. +# - The build script appends the path to the built binary to a "MANIFEST_FILE", allowing the discovery of each fuzz +# target by the script. from __future__ import annotations @@ -21,6 +24,7 @@ import requests + # TODO: replace me to dd-trace-py ops' slack channel once initial onboarding is done SLACK_CHANNEL = "fuzzing-ops" TEAM_NAME = "profiling-python" From 0570e8981561d979489df088c16da3f56eca7832 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 13:00:55 -0500 Subject: [PATCH 07/35] format CMakeLists.txt --- .../profiling/stack/fuzz/CMakeLists.txt | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt index da9091f79ff..fda0423c2c3 100644 --- a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt @@ -1,23 +1,16 @@ cmake_minimum_required(VERSION 3.19) -# Fuzz targets are built only when stack/CMakeLists.txt has BUILD_FUZZING=ON. -# The caller is expected to provide compiler/linker flags for libFuzzer -# (e.g. -fsanitize=fuzzer,address,undefined). - +# Fuzz targets are built only when stack/CMakeLists.txt has BUILD_FUZZING=ON. The caller is expected to provide +# compiler/linker flags for libFuzzer (e.g. -fsanitize=fuzzer,address,undefined). option(STACK_USE_LIBFUZZER "Link fuzz targets with libFuzzer (-fsanitize=fuzzer)" OFF) -add_executable(fuzz_echion_remote_read - fuzz_echion_remote_read.cpp - ../src/echion/frame.cc - ../src/echion/danger.cc - ../src/echion/stack_chunk.cc - ../src/echion/strings.cc - ../src/echion/long.cc -) +add_executable( + fuzz_echion_remote_read ../src/echion/danger.cc ../src/echion/frame.cc ../src/echion/long.cc + ../src/echion/stack_chunk.cc ../src/echion/strings.cc fuzz_echion_remote_read.cpp) target_include_directories(fuzz_echion_remote_read PRIVATE ../include) target_include_directories(fuzz_echion_remote_read SYSTEM PRIVATE ${Python3_INCLUDE_DIRS} ../echion ../include/vendored - ../include/util) + ../include/util) # Ensure echion headers take the fuzz hook in vm.h target_compile_definitions(fuzz_echion_remote_read PRIVATE ECHION_FUZZING) @@ -42,5 +35,3 @@ add_ddup_config(fuzz_echion_remote_read) if(Python3_LIBRARIES) target_link_libraries(fuzz_echion_remote_read PRIVATE ${Python3_LIBRARIES}) endif() - - From 32b5f22deca9c7282f17f12480d58e343bbb3038 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 13:26:19 -0500 Subject: [PATCH 08/35] add all echion cc files --- .../datadog/profiling/stack/fuzz/CMakeLists.txt | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt index fda0423c2c3..911405eed09 100644 --- a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt @@ -5,8 +5,21 @@ cmake_minimum_required(VERSION 3.19) option(STACK_USE_LIBFUZZER "Link fuzz targets with libFuzzer (-fsanitize=fuzzer)" OFF) add_executable( - fuzz_echion_remote_read ../src/echion/danger.cc ../src/echion/frame.cc ../src/echion/long.cc - ../src/echion/stack_chunk.cc ../src/echion/strings.cc fuzz_echion_remote_read.cpp) + fuzz_echion_remote_read + ../src/echion/danger.cc + ../src/echion/frame.cc + ../src/echion/greenlets.cc + ../src/echion/interp.cc + ../src/echion/long.cc + ../src/echion/mirrors.cc + ../src/echion/stack_chunk.cc + ../src/echion/stacks.cc + ../src/echion/strings.cc + ../src/echion/tasks.cc + ../src/echion/threads.cc + ../src/echion/timing.cc + ../src/echion/vm.cc + fuzz_echion_remote_read.cpp) target_include_directories(fuzz_echion_remote_read PRIVATE ../include) target_include_directories(fuzz_echion_remote_read SYSTEM PRIVATE ${Python3_INCLUDE_DIRS} ../echion ../include/vendored From 5aa5dd73893fcdacdc78d0b8fd07b3f8f3e5b992 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 13:34:49 -0500 Subject: [PATCH 09/35] vm.cc needs to be removed as that defines copy_memory again, leading to redefinition error --- ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt index 911405eed09..cde61bb8208 100644 --- a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt @@ -18,7 +18,6 @@ add_executable( ../src/echion/tasks.cc ../src/echion/threads.cc ../src/echion/timing.cc - ../src/echion/vm.cc fuzz_echion_remote_read.cpp) target_include_directories(fuzz_echion_remote_read PRIVATE ../include) From 14117e08a895a6d2196dad673dbb42921ea077c9 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 13:45:02 -0500 Subject: [PATCH 10/35] Add a comment on Python version --- .gitlab/fuzz.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitlab/fuzz.yml b/.gitlab/fuzz.yml index 7f22bf4c7b0..a495965ed75 100644 --- a/.gitlab/fuzz.yml +++ b/.gitlab/fuzz.yml @@ -20,6 +20,10 @@ fuzz_infra: allow_failure: true before_script: # Install build dependencies (same as docker/Dockerfile.fuzz) + # TODO(taegyunkim): Fuzz with all supported versions of Python (3.9 - 3.14). + # On ubuntu:24.04 image, python3 version defaults to 3.12.3, meaning that + # fuzzing will only run for binary that is linked with that version of + # Python. - apt-get update && apt-get install -y --no-install-recommends ca-certificates clang cmake git libclang-rt-dev lld make ninja-build python3 python3-dev python3-pip curl unzip - python3 -m pip install requests --break-system-packages # Install vault for fuzzing API authentication From 4db133c953676634fe5d8b845b5a0c66bcc8b87c Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 13:46:39 -0500 Subject: [PATCH 11/35] Add a comment on base image and using the same image as in .gitlab/fuzz.yml --- docker/Dockerfile.fuzz | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile.fuzz b/docker/Dockerfile.fuzz index 4d8ae7265f2..9e5dbb04233 100644 --- a/docker/Dockerfile.fuzz +++ b/docker/Dockerfile.fuzz @@ -2,9 +2,11 @@ # # Build: # $ docker build -f docker/Dockerfile.fuzz -t ddtrace-py-stackv2-fuzz . -# Run: +# Run: # $ docker run --rm -it -v "$PWD/.fuzz:/fuzz" ddtrace-py-stackv2-fuzz +# TODO(taegyunkim): Add this image to Datadog/images, and update +# .gitlab/fuzz.yml to use the same images. FROM debian:trixie-slim ENV DEBIAN_FRONTEND=noninteractive From 4523978b3bdffbe01860bb6b86c35a8654bba79a Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 14:05:31 -0500 Subject: [PATCH 12/35] add a docs section on fuzzing --- docs/contributing-fuzzing.rst | 331 ++++++++++++++++++++++++++++++++++ docs/contributing.rst | 3 + docs/spelling_wordlist.txt | 9 + 3 files changed, 343 insertions(+) create mode 100644 docs/contributing-fuzzing.rst diff --git a/docs/contributing-fuzzing.rst b/docs/contributing-fuzzing.rst new file mode 100644 index 00000000000..330a54a92ef --- /dev/null +++ b/docs/contributing-fuzzing.rst @@ -0,0 +1,331 @@ +.. _fuzzing_guidelines: + +Fuzzing Native Code +=================== + +This document describes how to add fuzzing harnesses for native C/C++ code in dd-trace-py. + +What is Fuzzing? +---------------- + +Fuzzing is an automated testing technique that feeds random or mutated inputs to code to discover +bugs, crashes, and security vulnerabilities. For native C/C++ code, fuzzing can detect: + +* Buffer overflows and memory corruption +* Use-after-free bugs +* Integer overflows +* Null pointer dereferences +* Undefined behavior + +dd-trace-py uses **libFuzzer** with **AddressSanitizer (ASAN)** and **UndefinedBehaviorSanitizer (UBSAN)** +to continuously test native code components. + +Fuzzing Infrastructure Overview +-------------------------------- + +The repository has a "0 click onboarding" fuzzing infrastructure that automatically discovers, +builds, uploads, and runs fuzzing harnesses. + +**How it works:** + +1. **Discovery**: CI recursively searches for ``**/fuzz/build.sh`` files anywhere in the repository +2. **Build**: Each discovered ``build.sh`` script is executed to compile fuzzing binaries +3. **Registration**: Built binaries are uploaded to Datadog's internal fuzzing platform +4. **Continuous Fuzzing**: Binaries run continuously with crash reporting to Slack (``#fuzzing-ops``) + +**Architecture**:: + + Repository + └── / + └── fuzz/ + ├── build.sh # Build script (auto-discovered) + ├── fuzz_*.cpp # Fuzzing harness + └── CMakeLists.txt # Build configuration + + CI Pipeline (.gitlab/fuzz.yml): + 1. Discover: glob.glob("**/fuzz/build.sh") + 2. Build: Execute each build.sh + 3. Collect: Read /tmp/fuzz/build/fuzz_binaries.txt + 4. Upload: POST binaries to fuzzing API + 5. Register: Create continuous fuzzer + 6. Report: Crashes sent to Slack + +Adding a New Fuzzing Harness +----------------------------- + +1. Create Directory Structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a ``fuzz/`` subdirectory in your component: + +.. code-block:: bash + + $ mkdir -p path/to/your/component/fuzz/ + $ cd path/to/your/component/fuzz/ + +2. Write Your Fuzzing Harness +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a C/C++ file implementing the libFuzzer interface: + +.. code-block:: cpp + + // fuzz_your_component.cpp + #include + #include + #include "your_component.h" // Your code to test + + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + if (size == 0) { + return 0; + } + + // Call your code with fuzzer-generated input + your_function_to_test(data, size); + + return 0; // Continue fuzzing + } + +**Key points:** + +* Implement ``LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)`` +* Return 0 to continue fuzzing +* Keep the harness simple - let sanitizers catch bugs +* Handle empty inputs gracefully + +3. Create CMakeLists.txt +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.19) + + add_executable(fuzz_your_component + fuzz_your_component.cpp + ../src/your_source.c + ) + + target_include_directories(fuzz_your_component PRIVATE ../include) + + if(STACK_USE_LIBFUZZER) + target_compile_options(fuzz_your_component PRIVATE + -fsanitize=fuzzer,address,undefined + -fno-omit-frame-pointer + ) + target_link_options(fuzz_your_component PRIVATE + -fsanitize=fuzzer,address,undefined + ) + endif() + +4. Create build.sh Script +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create an executable ``build.sh``: + +.. code-block:: bash + + #!/bin/bash + set -e + + TARGET=fuzz_your_component + BUILD_DIR=/tmp/fuzz/build/your_component # Use unique subdirectory + MANIFEST_FILE=/tmp/fuzz/build/fuzz_binaries.txt + + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + SOURCE_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" + + cmake -S "${SCRIPT_DIR}" -B "${BUILD_DIR}" \ + -DSTACK_USE_LIBFUZZER=ON \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_C_FLAGS="-O1 -g -fsanitize=address,undefined" \ + -DCMAKE_CXX_FLAGS="-O1 -g -fsanitize=address,undefined" \ + && cmake --build "${BUILD_DIR}" -j --target $TARGET + + # Register binary in manifest (REQUIRED) + BINARY_PATH="${BUILD_DIR}/${TARGET}" + if [ -x "${BINARY_PATH}" ]; then + echo "${BINARY_PATH}" >> "${MANIFEST_FILE}" + echo "✅ Registered binary: ${BINARY_PATH}" + else + echo "❌ Binary not found: ${BINARY_PATH}" + exit 1 + fi + +**Make executable:** ``chmod +x build.sh`` + +**Critical requirements:** + +* Script must be named exactly ``build.sh`` +* Must append binary path to ``/tmp/fuzz/build/fuzz_binaries.txt`` +* Use a unique ``BUILD_DIR`` subdirectory +* Exit with non-zero status if build fails + +5. Test Locally +~~~~~~~~~~~~~~~~ + +**Using Docker** (recommended): + +.. code-block:: bash + + $ docker build -f docker/Dockerfile.fuzz -t ddtrace-py-fuzz . + $ docker run --rm -it ddtrace-py-fuzz + +**Local build:** + +.. code-block:: bash + + $ cd path/to/your/component/fuzz/ + $ ./build.sh + $ /tmp/fuzz/build/your_component/fuzz_your_component -max_total_time=60 + +6. Commit and Push +~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + $ git add path/to/your/component/fuzz/ + $ git commit -m "feat: add fuzzing for your component" + $ git push + +7. Trigger Fuzzing Job in CI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The fuzzing job runs automatically on the ``main`` branch but must be triggered manually on pull requests. + +**In your GitLab CI pipeline:** + +1. Go to your merge request's **Pipelines** tab +2. Find the ``fuzz_infra`` job (it will show as "manual" or have a play button) +3. Click the play button (▶) to trigger the job + +This builds your fuzzer, uploads it to the fuzzing platform, and verifies it works correctly. + +**After merging to main**, the fuzzer runs automatically on every commit and continuously in the background. + +Example: Existing Fuzzer +------------------------- + +See the profiling stack sampler fuzzer for a complete example: + +.. code-block:: text + + ddtrace/internal/datadog/profiling/stack/fuzz/ + ├── build.sh + ├── fuzz_echion_remote_read.cpp + └── CMakeLists.txt + +This fuzzer tests echion's ability to parse Python stack frames from remote processes. + +Advanced: Testing Remote Process Memory Reads +---------------------------------------------- + +For code that reads memory from remote processes (like echion), use conditional compilation +to replace the real memory read function with a mock: + +.. code-block:: cpp + + // In your header file (e.g., vm.h) + #if defined(YOUR_COMPONENT_FUZZING) + extern "C" int your_fuzz_copy_memory(pid_t pid, void* addr, + size_t len, void* buf); + #define copy_memory your_fuzz_copy_memory + #else + int copy_memory(pid_t pid, void* addr, size_t len, void* buf); + #endif + + // In your fuzzer harness + static thread_local const uint8_t* g_data = nullptr; + static thread_local size_t g_size = 0; + + extern "C" int your_fuzz_copy_memory(pid_t pid, void* addr, + size_t len, void* buf) { + // Serve fuzzer input bytes as "fake remote memory" + // ... bounds checking ... + memcpy(buf, g_data + offset, len); + return 0; + } + +See ``ddtrace/internal/datadog/profiling/stack/fuzz/fuzz_echion_remote_read.cpp`` for a complete example. + +Common Build Options +-------------------- + +**Compiler flags:** + +``-O1`` + Light optimization for reasonable performance while preserving debuggability + +``-g`` + Include debug symbols for better crash reports + +``-fno-omit-frame-pointer`` + Required for accurate ASAN stack traces + +``-fsanitize=fuzzer`` + Enable libFuzzer instrumentation + +``-fsanitize=address`` + Enable AddressSanitizer for memory error detection + +``-fsanitize=undefined`` + Enable UndefinedBehaviorSanitizer + +**libFuzzer runtime options:** + +.. code-block:: bash + + $ ./fuzzer corpus/ -max_total_time=60 -max_len=4096 -jobs=4 + +``-max_total_time=N`` + Run for N seconds then exit + +``-max_len=N`` + Limit input size to N bytes + +``-jobs=N`` + Run N parallel fuzzing jobs + +``-artifact_prefix=path/`` + Store crash artifacts in this directory + +Current Limitations +------------------- + +**Single Python Version** + Fuzzing currently only runs on Python 3.12.3, despite dd-trace-py supporting Python 3.9-3.14. + Bugs in version-specific code paths may not be discovered. + +Resources and References +------------------------ + +**libFuzzer documentation:** + https://llvm.org/docs/LibFuzzer.html + +**AddressSanitizer:** + https://clang.llvm.org/docs/AddressSanitizer.html + +**UndefinedBehaviorSanitizer:** + https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html + +**Fuzzing best practices:** + https://github.com/google/fuzzing/blob/master/docs/good-fuzz-target.md + +**Example fuzzer in this repository:** + ``ddtrace/internal/datadog/profiling/stack/fuzz/fuzz_echion_remote_read.cpp`` + +**Crash reports:** + Check ``#fuzzing-ops`` Slack channel + +Quick Start Checklist +--------------------- + +1. ☐ Create ``fuzz/`` directory in your component +2. ☐ Write ``fuzz_*.cpp`` implementing ``LLVMFuzzerTestOneInput()`` +3. ☐ Create ``CMakeLists.txt`` with fuzzer build configuration +4. ☐ Create executable ``build.sh`` that builds and registers binary +5. ☐ Test locally with Docker or manual build +6. ☐ Commit and push +7. ☐ Manually trigger ``fuzz_infra`` job in GitLab CI (on pull requests) +8. ☐ Monitor ``#fuzzing-ops`` for crash reports diff --git a/docs/contributing.rst b/docs/contributing.rst index 91585eb953a..6618b914edf 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -16,6 +16,8 @@ If you're trying to set up a local development environment, read `this `_. +`Fuzzing native code documentation for contributors `_. + Thanks for working with us! .. _change_process: @@ -161,6 +163,7 @@ about Instrumentation Telemetry. contributing-design contributing-integrations contributing-testing + contributing-fuzzing contributing-tracing contributing-release releasenotes diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 13c8384e5ad..6f7ba029f0f 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -20,6 +20,7 @@ appsec AppSec aredis args +ASAN ascii asgi asm @@ -120,6 +121,9 @@ flamegraph fnmatch formatter freezegun +fuzzer +fuzzers +fuzzing gdb genai generativeai @@ -135,6 +139,8 @@ grpc gRPC gunicorn Gunicorn +harness +harnesses hostname hostnames hotspot @@ -168,8 +174,10 @@ kwarg kwargs langchain langchain_community +libclang libdatadog libddwaf +libFuzzer lifecycle linters litellm @@ -313,6 +321,7 @@ tokenizer tracecontext tracestate tweens +UBSAN # docs/configuration.rst uest unbuffered From d351f67031738a2392330ccf8dee82eccc9c4730 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 14:10:40 -0500 Subject: [PATCH 13/35] Set owners for fuzzing related files --- .github/CODEOWNERS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index d133481191c..8a054562e4e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -285,3 +285,8 @@ tests/contrib/**/test_*dsm.py @DataDog/data-streams- tests/**/*appsec* @DataDog/asm-python tests/**/*iast* @DataDog/asm-python tests/tracer/test_propagation.py @DataDog/apm-sdk-capabilities-python @DataDog/asm-python + +# Fuzzing +.gitlab/fuzz.yml @DataDog/chaos-platform @DataDog/profiling-python +.gitlab/scripts/fuzz_infra.py @DataDog/chaos-platform @DataDog/profiling-python +docker/Dockerfile.fuzz @DataDog/chaos-platform @DataDog/profiling-python From 92ba050dceb14736e9ce6e4f56422c3e1c52d1f0 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 14:12:05 -0500 Subject: [PATCH 14/35] Add datadog internal docs link --- docs/contributing-fuzzing.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/contributing-fuzzing.rst b/docs/contributing-fuzzing.rst index 330a54a92ef..10a864fbffe 100644 --- a/docs/contributing-fuzzing.rst +++ b/docs/contributing-fuzzing.rst @@ -300,6 +300,9 @@ Current Limitations Resources and References ------------------------ +**Datadog Fuzzing Platform (internal):** + https://datadoghq.atlassian.net/wiki/spaces/RESENG/pages/2147976713/Fuzzing + **libFuzzer documentation:** https://llvm.org/docs/LibFuzzer.html From 2321f3f54ef9a9afe2a6d12740ab7cbaaad75514 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 21 Jan 2026 10:19:44 +0100 Subject: [PATCH 15/35] Add spelling wordlist --- docs/spelling_wordlist.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 6f7ba029f0f..64bff0926f2 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -4,8 +4,8 @@ adk agentless aiobotocore aiohttp -aiomysql aiokafka +aiomysql aiopg aioredis algolia @@ -20,6 +20,7 @@ appsec AppSec aredis args +ARN ASAN ascii asgi @@ -39,7 +40,6 @@ avro Avro aws AWS -ARN backend backends backoff @@ -69,13 +69,13 @@ composable config contextvar contextvars +contrib coroutine coroutines CPU CPython CUPTI Cython -contrib datadog datadoghq dataset @@ -84,8 +84,10 @@ datastores dbapi ddtrace deallocating +debuggability decompiling deprecations +dereferences DES deserializing django @@ -207,6 +209,7 @@ mysql MySQL mysqlclient mysqldb +onboarding # tests/contrib/openai/test_openai_v1.py Nam namespace From f3722826dc79c689aa6ea6cec4e0e861dd4fdd13 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 21 Jan 2026 12:04:43 +0100 Subject: [PATCH 16/35] empty commit From d8b12d9f18e09b4d410646f4258e197e5cd82a72 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Mon, 15 Dec 2025 14:43:26 +0100 Subject: [PATCH 17/35] WIP Fuzzer --- .gitignore | 3 + .../datadog/profiling/stack/CMakeLists.txt | 15 ++ .../profiling/stack/echion/echion/vm.h | 13 ++ .../profiling/stack_v2/fuzz/CMakeLists.txt | 39 ++++ .../stack_v2/fuzz/fuzz_echion_remote_read.cpp | 188 ++++++++++++++++++ docker/Dockerfile.fuzz | 43 ++++ 6 files changed, 301 insertions(+) create mode 100644 ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt create mode 100644 ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp create mode 100644 docker/Dockerfile.fuzz diff --git a/.gitignore b/.gitignore index f818c695c2c..6c7145aef0b 100644 --- a/.gitignore +++ b/.gitignore @@ -209,3 +209,6 @@ tests/appsec/iast/fixtures/taint_sinks/not_exists.txt # Rust build artifacts src/native/target* + +# Fuzzing corpus, output and artifacts +.fuzz/ \ No newline at end of file diff --git a/ddtrace/internal/datadog/profiling/stack/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/CMakeLists.txt index 918092580b1..1158ae7db40 100644 --- a/ddtrace/internal/datadog/profiling/stack/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/CMakeLists.txt @@ -151,3 +151,18 @@ if(BUILD_TESTING) enable_testing() add_subdirectory(test) endif() + +# Fuzzing harnesses (off by default) +if(NOT DEFINED BUILD_FUZZING) + set(BUILD_FUZZING + OFF + CACHE BOOL "Build fuzzing harnesses for stack_v2/echion") +else() + set(BUILD_FUZZING + ON + CACHE BOOL "Build fuzzing harnesses for stack_v2/echion") +endif() + +if(BUILD_FUZZING) + add_subdirectory(fuzz) +endif() diff --git a/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h b/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h index cbe884294d7..5fc07fda976 100644 --- a/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h +++ b/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h @@ -162,8 +162,21 @@ init_safe_copy() * * @return zero on success, otherwise non-zero. */ +#if defined(ECHION_FUZZING) +// Let the fuzzing harness control the copy_memory behavior, so we can simulate "garbage" reads. +extern "C" int +echion_fuzz_copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf); + +int +copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf) +{ + return echion_fuzz_copy_memory(proc_ref, addr, len, buf); +} +#else +// Implementation in vm.cc int copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf); +#endif inline pid_t pid = 0; diff --git a/ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt new file mode 100644 index 00000000000..fe747a8582a --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt @@ -0,0 +1,39 @@ +cmake_minimum_required(VERSION 3.19) + +# Fuzz targets are built only when stack_v2/CMakeLists.txt has BUILD_FUZZING=ON. +# The caller is expected to provide compiler/linker flags for libFuzzer +# (e.g. -fsanitize=fuzzer,address,undefined). + +option(STACKV2_USE_LIBFUZZER "Link fuzz targets with libFuzzer (-fsanitize=fuzzer)" OFF) + +add_executable(fuzz_echion_remote_read fuzz_echion_remote_read.cpp ../src/echion/frame.cc ../src/echion/danger.cc) + +target_include_directories(fuzz_echion_remote_read PRIVATE ../include) +target_include_directories(fuzz_echion_remote_read SYSTEM PRIVATE ${Python3_INCLUDE_DIRS} ../echion ../include/vendored + ../include/util) + +# Ensure echion headers take the fuzz hook in vm.h +target_compile_definitions(fuzz_echion_remote_read PRIVATE ECHION_FUZZING) + +# When building with libFuzzer, add the fuzzer runtime only for this target. +if(STACKV2_USE_LIBFUZZER) + target_compile_definitions(fuzz_echion_remote_read PRIVATE FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) + target_compile_options(fuzz_echion_remote_read PRIVATE -fsanitize=fuzzer,address,undefined -fno-omit-frame-pointer) + target_link_options(fuzz_echion_remote_read PRIVATE -fsanitize=fuzzer,address,undefined) +endif() + +# Echion sources need to be given the current platform +if(APPLE) + target_compile_definitions(fuzz_echion_remote_read PRIVATE PL_DARWIN) +elseif(UNIX) + target_compile_definitions(fuzz_echion_remote_read PRIVATE PL_LINUX) +endif() + +# Use the same ddup config helper for sanitizer/rpath defaults. +add_ddup_config(fuzz_echion_remote_read) + +if(Python3_LIBRARIES) + target_link_libraries(fuzz_echion_remote_read PRIVATE ${Python3_LIBRARIES}) +endif() + + diff --git a/ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp b/ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp new file mode 100644 index 00000000000..eb60732f3e7 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp @@ -0,0 +1,188 @@ +// Fuzz harness (raw): treat libFuzzer input bytes as a remote memory image and +// let echion attempt to interpret *any* garbage. +// +// This is intentionally a "minimal structure" harness: we do not synthesize +// valid CPython object layouts. Instead, we pass fuzz-derived remote addresses +// into echion APIs and rely on echion's own size caps (e.g. MAX_MIRROR_SIZE, +// MAX_STRING_SIZE) to keep the harness stable. + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#if PY_VERSION_HEX >= 0x030b0000 +#include +#endif + +#if PY_VERSION_HEX >= 0x030a0000 +// Expose PyCodeObject fields for local stack allocation and field assignment. +// Echion itself uses internal headers; the fuzz harness does the same. +#ifndef Py_BUILD_CORE +#define Py_BUILD_CORE +#endif +#include +#endif + +namespace { + +static constexpr uintptr_t kRemoteBase = 0x10000000ULL; + +static thread_local const uint8_t* g_data = nullptr; +static thread_local size_t g_size = 0; + +static inline uintptr_t +addr_from_u64(uint64_t v) +{ + if (g_size == 0) { + return kRemoteBase; + } + return kRemoteBase + static_cast(v % g_size); +} + +static inline uint64_t +load_u64_le(const uint8_t* data, size_t size, size_t off) +{ + uint64_t v = 0; + if (off >= size) { + return 0; + } + const size_t n = std::min(8, size - off); + std::memcpy(&v, data + off, n); + return v; +} + +static inline int +load_int_le(const uint8_t* data, size_t size, size_t off) +{ + int v = 0; + if (off >= size) { + return 0; + } + const size_t n = std::min(4, size - off); + std::memcpy(&v, data + off, n); + return v; +} + +} // namespace + +extern "C" int +echion_fuzz_copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf) +{ + (void)proc_ref; + + // Return 0 on success, non-zero on failure (matches copy_memory contract). + if (!g_data || !buf || len < 0) { + return -1; + } + + // Keep individual reads bounded to avoid pathological slow paths. + static constexpr size_t kMaxCopy = 2U << 20; // 2 MiB + if (static_cast(len) > kMaxCopy) { + return -1; + } + + uintptr_t a = reinterpret_cast(addr); + if (a >= kRemoteBase) { + size_t off = static_cast(a - kRemoteBase); + if (off + static_cast(len) <= g_size) { + std::memcpy(buf, g_data + off, static_cast(len)); + return 0; + } + } + + return -1; +} + +extern "C" int +LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) +{ + g_data = data; + g_size = size; + + if (size == 0) { + return 0; + } + + // Pick fuzz generated values. + // pointers: "remote address" inside the input data + uintptr_t p0 = addr_from_u64(load_u64_le(data, size, 0)); + uintptr_t p1 = addr_from_u64(load_u64_le(data, size, 8)); + uintptr_t p2 = addr_from_u64(load_u64_le(data, size, 16)); + + // lasti: last instruction index, used by Frame::create() + int lasti = load_int_le(data, size, 24); + +#if PY_VERSION_HEX >= 0x030b0000 + { + StackChunk sc; + (void)sc.update(reinterpret_cast<_PyStackChunk*>(p0)); + (void)sc.resolve(reinterpret_cast(p1)); + } +#endif + + { + // Create a *local* PyCodeObject with pointers to arbitrary remote garbage. + // Frame::create() will attempt to read those remote objects via copy_type/copy_generic. + PyCodeObject code{}; + code.co_firstlineno = 1; + code.co_filename = reinterpret_cast(p0); + +#if PY_VERSION_HEX >= 0x030b0000 + code.co_qualname = reinterpret_cast(p1); + code.co_linetable = reinterpret_cast(p2); +#elif PY_VERSION_HEX >= 0x030a0000 + code.co_name = reinterpret_cast(p1); + code.co_linetable = reinterpret_cast(p2); +#else + code.co_name = reinterpret_cast(p1); + code.co_lnotab = reinterpret_cast(p2); +#endif + + // CORE of the fuzz harness + (void)Frame::create(&code, lasti); + // TODO: Call more internal functions to trigger more code paths + // Possible ideas: + // - MirrorSet::* + // - StackChunk::* + // - ThreadInfo::* + } + + g_data = nullptr; + g_size = 0; + return 0; +} + +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +// Standalone entrypoint for quick sanity runs without linking libFuzzer. +// When building with libFuzzer, the fuzzer runtime provides `main()`. +#include +#include + +int +main(int argc, char** argv) +{ + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " \n"; + return 2; + } + + std::ifstream f(argv[1], std::ios::binary); + if (!f) { + std::cerr << "Failed to open input file\n"; + return 2; + } + + std::vector data((std::istreambuf_iterator(f)), std::istreambuf_iterator()); + (void)LLVMFuzzerTestOneInput(data.data(), data.size()); + return 0; +} +#endif + + diff --git a/docker/Dockerfile.fuzz b/docker/Dockerfile.fuzz new file mode 100644 index 00000000000..00f29465b9f --- /dev/null +++ b/docker/Dockerfile.fuzz @@ -0,0 +1,43 @@ +# Minimal fuzzing image for the stack_v2/echion harness (libFuzzer + ASAN/UBSAN). +# +# Build: +# $ docker build -f docker/Dockerfile.fuzz -t ddtrace-py-stackv2-fuzz . +# Run: +# $ docker run --rm -it -v "$PWD/.fuzz:/fuzz" ddtrace-py-stackv2-fuzz + +FROM debian:trixie-slim + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + clang \ + cmake \ + git \ + libclang-rt-dev \ + lld \ + make \ + ninja-build \ + python3 \ + python3-dev \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /src +COPY . /src + +# Build only the fuzz target; skip the Python extension (avoids libdd_wrapper dependency). +RUN cmake -S ddtrace/internal/datadog/profiling/stack_v2 -B /build \ + -DBUILD_FUZZING=ON -DBUILD_TESTING=OFF -DSTACKV2_USE_LIBFUZZER=ON \ + -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_C_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,undefined" \ + -DCMAKE_CXX_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,undefined" \ + -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=address,undefined" \ + && cmake --build /build -j --target fuzz_echion_remote_read + +# RUN mkdir -p /fuzz/corpus /fuzz/out + +CMD ["/build/fuzz/fuzz_echion_remote_read", "/fuzz/", "-artifact_prefix=/fuzz/"] + + From d23e8b2865b2927c96c5671bef9231899fa075c2 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 17 Dec 2025 17:24:29 +0100 Subject: [PATCH 18/35] onboard to internal fuzzing infra --- .gitlab-ci.yml | 2 + .gitlab/fuzz.yml | 29 +++ .gitlab/scripts/fuzz_infra.py | 243 ++++++++++++++++++ .../{stack_v2 => stack}/fuzz/CMakeLists.txt | 6 +- .../datadog/profiling/stack/fuzz/build.sh | 32 +++ .../fuzz/fuzz_echion_remote_read.cpp | 6 +- docker/Dockerfile.fuzz | 20 +- 7 files changed, 320 insertions(+), 18 deletions(-) create mode 100644 .gitlab/fuzz.yml create mode 100755 .gitlab/scripts/fuzz_infra.py rename ddtrace/internal/datadog/profiling/{stack_v2 => stack}/fuzz/CMakeLists.txt (88%) create mode 100755 ddtrace/internal/datadog/profiling/stack/fuzz/build.sh rename ddtrace/internal/datadog/profiling/{stack_v2 => stack}/fuzz/fuzz_echion_remote_read.cpp (99%) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c399733c7eb..b19f5fccb66 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,5 @@ stages: + - fuzz - package - tests - shared-pipeline @@ -51,6 +52,7 @@ include: - local: ".gitlab/multi-os-tests.yml" - local: ".gitlab/benchmarks/serverless.yml" - local: ".gitlab/native.yml" + - local: ".gitlab/fuzz.yml" tests-gen: stage: tests diff --git a/.gitlab/fuzz.yml b/.gitlab/fuzz.yml new file mode 100644 index 00000000000..7f22bf4c7b0 --- /dev/null +++ b/.gitlab/fuzz.yml @@ -0,0 +1,29 @@ +variables: + REPO_LANG: python # "python" is used everywhere rather than "py" + # CI_DEBUG_SERVICES: "true" + +fuzz_infra: + image: + name: registry.ddbuild.io/images/mirror/ubuntu:24.04 + tags: ["arch:amd64"] + stage: fuzz + timeout: 30m + allow_failure: true + rules: + # runs on gitlab schedule and on merge to main. + # Also allow manual run in branches for ease of debug / testing + - if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "schedule"' + allow_failure: true + - if: $CI_COMMIT_BRANCH == "main" + allow_failure: true + - when: manual + allow_failure: true + before_script: + # Install build dependencies (same as docker/Dockerfile.fuzz) + - apt-get update && apt-get install -y --no-install-recommends ca-certificates clang cmake git libclang-rt-dev lld make ninja-build python3 python3-dev python3-pip curl unzip + - python3 -m pip install requests --break-system-packages + # Install vault for fuzzing API authentication + - VAULT_VERSION=1.21.1 && curl -fsSL "https://releases.hashicorp.com/vault/${VAULT_VERSION}/vault_${VAULT_VERSION}_linux_amd64.zip" -o vault.zip && unzip vault.zip && mv vault /usr/local/bin/vault && rm vault.zip && chmod +x /usr/local/bin/vault + - git config --global --add safe.directory ${CI_PROJECT_DIR} + script: + - python3 .gitlab/scripts/fuzz_infra.py diff --git a/.gitlab/scripts/fuzz_infra.py b/.gitlab/scripts/fuzz_infra.py new file mode 100755 index 00000000000..85fc605babe --- /dev/null +++ b/.gitlab/scripts/fuzz_infra.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 + +# This script enables "0 click onboarding" for new fuzzer in the dd-trace-py repository. +# This means that any new fuzzer should be automatically detected and run in the internal infrastructure with enrichments, reporting, triaging, auto fix etc.. +# Reports are submitted via Slack, with the channel defined by SLACK_CHANNEL +# +# Requirements: +# +# This scripts assumes that: +# - Each fuzz target is built in a separate build directory named `fuzz` and having a `build.sh` script that builds the target. +# - The build script appends the path to the built binary to a "MANIFEST_FILE", allowing the discovery of each fuzz target by the script. + +from __future__ import annotations + +from dataclasses import dataclass +import glob +import os +import subprocess +import sys +from typing import List + +import requests + +# TODO: replace me to dd-trace-py ops' slack channel once initial onboarding is done +SLACK_CHANNEL = "fuzzing-ops" +TEAM_NAME = "profiling-python" +REPOSITORY_URL = "https://github.com/DataDog/dd-trace-py" +PROJECT_NAME = "dd-trace-py" +# We currently only support libfuzzer for this repository. +FUZZ_TYPE = "libfuzzer" +API_URL = "https://fuzzing-api.us1.ddbuild.io/api/v1" + +# Paths and constants for script execution +REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +FUZZER_BINARY_BASE_PATH = "/tmp/fuzz/build" +MANIFEST_FILE = os.path.join(FUZZER_BINARY_BASE_PATH, "fuzz_binaries.txt") +MAX_PKG_NAME_LENGTH = 50 +VAULT_PATH = "vault" + + +@dataclass(frozen=True) +class FuzzBinary: + """Represents a built fuzz binary ready for upload.""" + + pkgname: str + binary_name: str + binary_path: str + + +def build_and_upload_fuzz( + team: str = TEAM_NAME, + slack_channel: str = SLACK_CHANNEL, + repository_url: str = REPOSITORY_URL, +) -> None: + git_sha = os.popen("git rev-parse HEAD").read().strip() + + # Step 1: Discover and run all build scripts + build_scripts = discover_build_scripts(REPO_ROOT) + if not build_scripts: + print(f"❌ No fuzz build scripts found under {REPO_ROOT}") + return + + # Clear any previous manifest file + if os.path.exists(MANIFEST_FILE): + os.remove(MANIFEST_FILE) + + for build_script in build_scripts: + run_build_script(build_script) + + # Step 2: Read the manifest file to discover built binaries + binaries = read_manifest(MANIFEST_FILE) + if not binaries: + print(f"❌ No fuzz binaries found in manifest {MANIFEST_FILE}") + return + + # Step 3: Upload and create a fuzzer for each binary + for binary in binaries: + upload_binary(binary, git_sha) + create_fuzzer(binary, git_sha, team, slack_channel, repository_url) + + print("✅ Fuzzing infrastructure setup completed successfully!") + + +def get_package_name(binary_name: str) -> str: + """ + Generate a package name for the fuzzing platform from a binary name. + It's prefixed with the repository name so it's easier to filter. + The package name is limited by k8s labels format: must be < 63 chars, alphamumeric and hyphen. + """ + return PROJECT_NAME + "-" + binary_name[:MAX_PKG_NAME_LENGTH].replace("_", "-") + + +def _is_executable(file_path: str) -> bool: + return os.path.isfile(file_path) and os.access(file_path, os.X_OK) + + +def discover_build_scripts(repo_root: str) -> List[str]: + """ + Discover fuzz build scripts by looking for '**/fuzz/build.sh' + + This allows for "0 click onboarding" for new fuzz harnesses. + """ + build_scripts: List[str] = [] + for build_script in glob.glob(os.path.join(repo_root, "**/fuzz/build.sh"), recursive=True): + print(f"Found build script: {build_script}") + build_scripts.append(build_script) + return build_scripts + + +def run_build_script(build_script: str) -> None: + """Run a fuzz build script.""" + fuzz_dir = os.path.dirname(build_script) + print(f"Building fuzz directory: {fuzz_dir}") + + if not os.path.isfile(build_script): + raise FileNotFoundError(build_script) + + try: + result = subprocess.run( + [build_script], + cwd=fuzz_dir, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + print(result.stdout) + if result.stderr: + print(result.stderr) + except subprocess.CalledProcessError as e: + print(f"❌ Build script failed with exit code {e.returncode}") + print(f"Command: {e.cmd}") + if e.stdout: + print(f"stdout:\n{e.stdout}") + if e.stderr: + print(f"stderr:\n{e.stderr}") + raise + + print(f"✅ Built fuzzers from {build_script}") + + +def read_manifest(manifest_path: str) -> List[FuzzBinary]: + """ + Read the manifest file created by build scripts to discover built binaries. + + Each build script appends its binary path(s) to this file. + """ + binaries: List[FuzzBinary] = [] + + if not os.path.isfile(manifest_path): + print(f"⚠️ No manifest file found at {manifest_path}") + return binaries + + with open(manifest_path) as f: + for line in f: + binary_path = line.strip() + if not binary_path: + continue + if not os.path.isfile(binary_path): + print(f"⚠️ Binary listed in manifest not found: {binary_path}") + continue + if not _is_executable(binary_path): + print(f"⚠️ Binary listed in manifest is not executable: {binary_path}") + continue + + binary_name = os.path.basename(binary_path) + print(f"Found fuzz binary: {binary_path}") + binaries.append( + FuzzBinary( + pkgname=get_package_name(binary_name), + binary_name=binary_name, + binary_path=binary_path, + ) + ) + + return binaries + + +def create_fuzzer(binary: FuzzBinary, git_sha: str, team: str, slack_channel: str, repository_url: str) -> bool: + """Register a fuzzer with the fuzzing platform.""" + print(f"Starting fuzzer for {binary.pkgname} ({binary.binary_name})...") + run_payload = { + "app": binary.pkgname, + "debug": False, + "version": git_sha, + "type": FUZZ_TYPE, + "binary": binary.binary_name, + "team": team, + "slack_channel": slack_channel, + "repository_url": repository_url, + } + try: + response = requests.post( + f"{API_URL}/apps/{binary.pkgname}/fuzzers", headers=get_headers(), json=run_payload, timeout=30 + ) + response.raise_for_status() + print(f"✅ Started fuzzer for {binary.pkgname} ({binary.binary_name})") + print(response.json()) + except Exception as e: + print(f"❌ Failed to start fuzzer for {binary.pkgname} ({binary.binary_name}): {e}") + return True + + return False + + +def upload_binary(binary: FuzzBinary, git_sha: str) -> bool: + """Upload a fuzz binary to the fuzzing platform.""" + try: + # Get presigned URL so we can use s3 uploading + print(f"Getting presigned URL for {binary.pkgname} ({binary.binary_name})...") + presigned_response = requests.post( + f"{API_URL}/apps/{binary.pkgname}/builds/{git_sha}/url", headers=get_headers(), timeout=30 + ) + + presigned_response.raise_for_status() + presigned_url = presigned_response.json()["data"]["url"] + + print(f"Uploading {binary.pkgname} ({binary.binary_name}) for {git_sha}...") + with open(binary.binary_path, "rb") as f: + upload_response = requests.put(presigned_url, data=f, timeout=300) + upload_response.raise_for_status() + print(f"✅ Uploaded {binary.binary_name}") + except Exception as e: + print(f"❌ Failed to upload binary for {binary.pkgname} ({binary.binary_name}): {e}") + return True + return False + + +def get_headers(): + auth_header = ( + os.popen(f"{VAULT_PATH} read -field=token identity/oidc/token/security-fuzzing-platform").read().strip() + ) + return {"Authorization": f"Bearer {auth_header}", "Content-Type": "application/json"} + + +if __name__ == "__main__": + print("🚀 Starting fuzzing infrastructure setup...") + try: + build_and_upload_fuzz() + print("✅ Fuzzing infrastructure setup completed successfully!") + except Exception as e: + print(f"❌ Failed to set up fuzzing infrastructure: {e}") + sys.exit(1) diff --git a/ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt similarity index 88% rename from ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt rename to ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt index fe747a8582a..73f268bc3da 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/fuzz/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt @@ -1,10 +1,10 @@ cmake_minimum_required(VERSION 3.19) -# Fuzz targets are built only when stack_v2/CMakeLists.txt has BUILD_FUZZING=ON. +# Fuzz targets are built only when stack/CMakeLists.txt has BUILD_FUZZING=ON. # The caller is expected to provide compiler/linker flags for libFuzzer # (e.g. -fsanitize=fuzzer,address,undefined). -option(STACKV2_USE_LIBFUZZER "Link fuzz targets with libFuzzer (-fsanitize=fuzzer)" OFF) +option(STACK_USE_LIBFUZZER "Link fuzz targets with libFuzzer (-fsanitize=fuzzer)" OFF) add_executable(fuzz_echion_remote_read fuzz_echion_remote_read.cpp ../src/echion/frame.cc ../src/echion/danger.cc) @@ -16,7 +16,7 @@ target_include_directories(fuzz_echion_remote_read SYSTEM PRIVATE ${Python3_INCL target_compile_definitions(fuzz_echion_remote_read PRIVATE ECHION_FUZZING) # When building with libFuzzer, add the fuzzer runtime only for this target. -if(STACKV2_USE_LIBFUZZER) +if(STACK_USE_LIBFUZZER) target_compile_definitions(fuzz_echion_remote_read PRIVATE FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) target_compile_options(fuzz_echion_remote_read PRIVATE -fsanitize=fuzzer,address,undefined -fno-omit-frame-pointer) target_link_options(fuzz_echion_remote_read PRIVATE -fsanitize=fuzzer,address,undefined) diff --git a/ddtrace/internal/datadog/profiling/stack/fuzz/build.sh b/ddtrace/internal/datadog/profiling/stack/fuzz/build.sh new file mode 100755 index 00000000000..52f70bdd4b6 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/build.sh @@ -0,0 +1,32 @@ +#!/bin/bash +set -e + +TARGET=fuzz_echion_remote_read +BUILD_DIR=/tmp/fuzz/build +MANIFEST_FILE="${BUILD_DIR}/fuzz_binaries.txt" + +# Get the directory where this script lives, then go up one level to the stack source +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" + +echo "Building fuzz target: $TARGET" +echo "Source directory: $SOURCE_DIR" + +cmake -S "${SOURCE_DIR}" -B "${BUILD_DIR}" \ + -DBUILD_FUZZING=ON -DBUILD_TESTING=OFF -DSTACK_USE_LIBFUZZER=ON \ + -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_C_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,undefined" \ + -DCMAKE_CXX_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,undefined" \ + -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=address,undefined" \ + && cmake --build "${BUILD_DIR}" -j --target $TARGET + +# Register the built binary in the manifest file for the CI infrastructure to discover +BINARY_PATH="${BUILD_DIR}/fuzz/${TARGET}" +if [ -x "${BINARY_PATH}" ]; then + echo "${BINARY_PATH}" >> "${MANIFEST_FILE}" + echo "✅ Registered binary in manifest: ${BINARY_PATH}" +else + echo "❌ Binary not found or not executable: ${BINARY_PATH}" + exit 1 +fi \ No newline at end of file diff --git a/ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp b/ddtrace/internal/datadog/profiling/stack/fuzz/fuzz_echion_remote_read.cpp similarity index 99% rename from ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp rename to ddtrace/internal/datadog/profiling/stack/fuzz/fuzz_echion_remote_read.cpp index eb60732f3e7..6a0e69d5a50 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/fuzz/fuzz_echion_remote_read.cpp +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/fuzz_echion_remote_read.cpp @@ -147,8 +147,8 @@ LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) // CORE of the fuzz harness (void)Frame::create(&code, lasti); - // TODO: Call more internal functions to trigger more code paths - // Possible ideas: + // TODO: Call more internal functions to trigger more code paths + // Possible ideas: // - MirrorSet::* // - StackChunk::* // - ThreadInfo::* @@ -184,5 +184,3 @@ main(int argc, char** argv) return 0; } #endif - - diff --git a/docker/Dockerfile.fuzz b/docker/Dockerfile.fuzz index 00f29465b9f..4d8ae7265f2 100644 --- a/docker/Dockerfile.fuzz +++ b/docker/Dockerfile.fuzz @@ -21,23 +21,21 @@ RUN apt-get update \ ninja-build \ python3 \ python3-dev \ - && rm -rf /var/lib/apt/lists/* + python3-pip \ + curl \ + unzip \ + && rm -rf /var/lib/apt/lists/* \ + && python3 -m pip install requests --break-system-packages + +RUN VAULT_VERSION=1.21.1 && curl -fsSL "https://releases.hashicorp.com/vault/${VAULT_VERSION}/vault_${VAULT_VERSION}_linux_amd64.zip" -o vault.zip && unzip vault.zip && mv vault /usr/local/bin/vault && rm vault.zip && chmod +x /usr/local/bin/vault WORKDIR /src COPY . /src -# Build only the fuzz target; skip the Python extension (avoids libdd_wrapper dependency). -RUN cmake -S ddtrace/internal/datadog/profiling/stack_v2 -B /build \ - -DBUILD_FUZZING=ON -DBUILD_TESTING=OFF -DSTACKV2_USE_LIBFUZZER=ON \ - -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DCMAKE_C_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,undefined" \ - -DCMAKE_CXX_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,undefined" \ - -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=address,undefined" \ - && cmake --build /build -j --target fuzz_echion_remote_read +RUN /src/ddtrace/internal/datadog/profiling/stack/fuzz/build.sh # RUN mkdir -p /fuzz/corpus /fuzz/out -CMD ["/build/fuzz/fuzz_echion_remote_read", "/fuzz/", "-artifact_prefix=/fuzz/"] +CMD ["/tmp/fuzz/build/fuzz/fuzz_echion_remote_read", "/tmp/fuzz/", "-artifact_prefix=/tmp/fuzz/"] From 7baa02e3f465a5e2a34da2ff81145287c8e91f5c Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 14 Jan 2026 17:14:52 +0100 Subject: [PATCH 19/35] fix rebase errors --- .../internal/datadog/profiling/stack/echion/echion/vm.h | 2 +- .../internal/datadog/profiling/stack/fuzz/CMakeLists.txt | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h b/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h index 5fc07fda976..ea0d818954b 100644 --- a/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h +++ b/ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h @@ -167,7 +167,7 @@ init_safe_copy() extern "C" int echion_fuzz_copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf); -int +inline int copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf) { return echion_fuzz_copy_memory(proc_ref, addr, len, buf); diff --git a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt index 73f268bc3da..71ac8ca6071 100644 --- a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt @@ -6,7 +6,13 @@ cmake_minimum_required(VERSION 3.19) option(STACK_USE_LIBFUZZER "Link fuzz targets with libFuzzer (-fsanitize=fuzzer)" OFF) -add_executable(fuzz_echion_remote_read fuzz_echion_remote_read.cpp ../src/echion/frame.cc ../src/echion/danger.cc) +add_executable(fuzz_echion_remote_read + fuzz_echion_remote_read.cpp + ../src/echion/frame.cc + ../src/echion/danger.cc + ../src/echion/stack_chunk.cc + ../src/echion/strings.cc +) target_include_directories(fuzz_echion_remote_read PRIVATE ../include) target_include_directories(fuzz_echion_remote_read SYSTEM PRIVATE ${Python3_INCLUDE_DIRS} ../echion ../include/vendored From 08f708b3377f763cf172743aaf2ac883fba22e68 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 14 Jan 2026 17:18:11 +0100 Subject: [PATCH 20/35] long.cc ? --- ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt index 71ac8ca6071..da9091f79ff 100644 --- a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt @@ -12,6 +12,7 @@ add_executable(fuzz_echion_remote_read ../src/echion/danger.cc ../src/echion/stack_chunk.cc ../src/echion/strings.cc + ../src/echion/long.cc ) target_include_directories(fuzz_echion_remote_read PRIVATE ../include) From cd1e2aacde905b18ae7eb9207c6e701be7093969 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 14 Jan 2026 17:28:40 +0100 Subject: [PATCH 21/35] linter --- .gitlab/scripts/fuzz_infra.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab/scripts/fuzz_infra.py b/.gitlab/scripts/fuzz_infra.py index 85fc605babe..5342b0f9199 100755 --- a/.gitlab/scripts/fuzz_infra.py +++ b/.gitlab/scripts/fuzz_infra.py @@ -3,7 +3,7 @@ # This script enables "0 click onboarding" for new fuzzer in the dd-trace-py repository. # This means that any new fuzzer should be automatically detected and run in the internal infrastructure with enrichments, reporting, triaging, auto fix etc.. # Reports are submitted via Slack, with the channel defined by SLACK_CHANNEL -# +# # Requirements: # # This scripts assumes that: From 72ca1174bc1952ef1316a662f1b86fb2a42110c8 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 14 Jan 2026 17:44:36 +0100 Subject: [PATCH 22/35] format --- .gitlab/scripts/fuzz_infra.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.gitlab/scripts/fuzz_infra.py b/.gitlab/scripts/fuzz_infra.py index 5342b0f9199..7a4eba4ce35 100755 --- a/.gitlab/scripts/fuzz_infra.py +++ b/.gitlab/scripts/fuzz_infra.py @@ -1,14 +1,17 @@ #!/usr/bin/env python3 # This script enables "0 click onboarding" for new fuzzer in the dd-trace-py repository. -# This means that any new fuzzer should be automatically detected and run in the internal infrastructure with enrichments, reporting, triaging, auto fix etc.. +# This means that any new fuzzer should be automatically detected and run in the internal +# infrastructure with enrichments, reporting, triaging, auto fix etc... # Reports are submitted via Slack, with the channel defined by SLACK_CHANNEL # # Requirements: # # This scripts assumes that: -# - Each fuzz target is built in a separate build directory named `fuzz` and having a `build.sh` script that builds the target. -# - The build script appends the path to the built binary to a "MANIFEST_FILE", allowing the discovery of each fuzz target by the script. +# - Each fuzz target is built in a separate build directory named `fuzz` and having a `build.sh` script that builds +# the target. +# - The build script appends the path to the built binary to a "MANIFEST_FILE", allowing the discovery of each fuzz +# target by the script. from __future__ import annotations @@ -21,6 +24,7 @@ import requests + # TODO: replace me to dd-trace-py ops' slack channel once initial onboarding is done SLACK_CHANNEL = "fuzzing-ops" TEAM_NAME = "profiling-python" From 2f4266e79649f803506434b1431b878ce4846b9d Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 13:00:55 -0500 Subject: [PATCH 23/35] format CMakeLists.txt --- .../profiling/stack/fuzz/CMakeLists.txt | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt index da9091f79ff..fda0423c2c3 100644 --- a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt @@ -1,23 +1,16 @@ cmake_minimum_required(VERSION 3.19) -# Fuzz targets are built only when stack/CMakeLists.txt has BUILD_FUZZING=ON. -# The caller is expected to provide compiler/linker flags for libFuzzer -# (e.g. -fsanitize=fuzzer,address,undefined). - +# Fuzz targets are built only when stack/CMakeLists.txt has BUILD_FUZZING=ON. The caller is expected to provide +# compiler/linker flags for libFuzzer (e.g. -fsanitize=fuzzer,address,undefined). option(STACK_USE_LIBFUZZER "Link fuzz targets with libFuzzer (-fsanitize=fuzzer)" OFF) -add_executable(fuzz_echion_remote_read - fuzz_echion_remote_read.cpp - ../src/echion/frame.cc - ../src/echion/danger.cc - ../src/echion/stack_chunk.cc - ../src/echion/strings.cc - ../src/echion/long.cc -) +add_executable( + fuzz_echion_remote_read ../src/echion/danger.cc ../src/echion/frame.cc ../src/echion/long.cc + ../src/echion/stack_chunk.cc ../src/echion/strings.cc fuzz_echion_remote_read.cpp) target_include_directories(fuzz_echion_remote_read PRIVATE ../include) target_include_directories(fuzz_echion_remote_read SYSTEM PRIVATE ${Python3_INCLUDE_DIRS} ../echion ../include/vendored - ../include/util) + ../include/util) # Ensure echion headers take the fuzz hook in vm.h target_compile_definitions(fuzz_echion_remote_read PRIVATE ECHION_FUZZING) @@ -42,5 +35,3 @@ add_ddup_config(fuzz_echion_remote_read) if(Python3_LIBRARIES) target_link_libraries(fuzz_echion_remote_read PRIVATE ${Python3_LIBRARIES}) endif() - - From 4c7af01649f9fc275a2bbe1b795e7af850dcf71f Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 13:26:19 -0500 Subject: [PATCH 24/35] add all echion cc files --- .../datadog/profiling/stack/fuzz/CMakeLists.txt | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt index fda0423c2c3..911405eed09 100644 --- a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt @@ -5,8 +5,21 @@ cmake_minimum_required(VERSION 3.19) option(STACK_USE_LIBFUZZER "Link fuzz targets with libFuzzer (-fsanitize=fuzzer)" OFF) add_executable( - fuzz_echion_remote_read ../src/echion/danger.cc ../src/echion/frame.cc ../src/echion/long.cc - ../src/echion/stack_chunk.cc ../src/echion/strings.cc fuzz_echion_remote_read.cpp) + fuzz_echion_remote_read + ../src/echion/danger.cc + ../src/echion/frame.cc + ../src/echion/greenlets.cc + ../src/echion/interp.cc + ../src/echion/long.cc + ../src/echion/mirrors.cc + ../src/echion/stack_chunk.cc + ../src/echion/stacks.cc + ../src/echion/strings.cc + ../src/echion/tasks.cc + ../src/echion/threads.cc + ../src/echion/timing.cc + ../src/echion/vm.cc + fuzz_echion_remote_read.cpp) target_include_directories(fuzz_echion_remote_read PRIVATE ../include) target_include_directories(fuzz_echion_remote_read SYSTEM PRIVATE ${Python3_INCLUDE_DIRS} ../echion ../include/vendored From 94414ffabbe332096de3697dc12c275b33f64e32 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 13:34:49 -0500 Subject: [PATCH 25/35] vm.cc needs to be removed as that defines copy_memory again, leading to redefinition error --- ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt index 911405eed09..cde61bb8208 100644 --- a/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/stack/fuzz/CMakeLists.txt @@ -18,7 +18,6 @@ add_executable( ../src/echion/tasks.cc ../src/echion/threads.cc ../src/echion/timing.cc - ../src/echion/vm.cc fuzz_echion_remote_read.cpp) target_include_directories(fuzz_echion_remote_read PRIVATE ../include) From 23ca79d770000083d3065e5001cbb04cd1ee3c98 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 13:45:02 -0500 Subject: [PATCH 26/35] Add a comment on Python version --- .gitlab/fuzz.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitlab/fuzz.yml b/.gitlab/fuzz.yml index 7f22bf4c7b0..a495965ed75 100644 --- a/.gitlab/fuzz.yml +++ b/.gitlab/fuzz.yml @@ -20,6 +20,10 @@ fuzz_infra: allow_failure: true before_script: # Install build dependencies (same as docker/Dockerfile.fuzz) + # TODO(taegyunkim): Fuzz with all supported versions of Python (3.9 - 3.14). + # On ubuntu:24.04 image, python3 version defaults to 3.12.3, meaning that + # fuzzing will only run for binary that is linked with that version of + # Python. - apt-get update && apt-get install -y --no-install-recommends ca-certificates clang cmake git libclang-rt-dev lld make ninja-build python3 python3-dev python3-pip curl unzip - python3 -m pip install requests --break-system-packages # Install vault for fuzzing API authentication From db106eac3bc5ff3ca99632846ef2b03b2b96e2f8 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 13:46:39 -0500 Subject: [PATCH 27/35] Add a comment on base image and using the same image as in .gitlab/fuzz.yml --- docker/Dockerfile.fuzz | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile.fuzz b/docker/Dockerfile.fuzz index 4d8ae7265f2..9e5dbb04233 100644 --- a/docker/Dockerfile.fuzz +++ b/docker/Dockerfile.fuzz @@ -2,9 +2,11 @@ # # Build: # $ docker build -f docker/Dockerfile.fuzz -t ddtrace-py-stackv2-fuzz . -# Run: +# Run: # $ docker run --rm -it -v "$PWD/.fuzz:/fuzz" ddtrace-py-stackv2-fuzz +# TODO(taegyunkim): Add this image to Datadog/images, and update +# .gitlab/fuzz.yml to use the same images. FROM debian:trixie-slim ENV DEBIAN_FRONTEND=noninteractive From 3064e5522f03bf1bb305849a74bcd8a0d4a96ccc Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 14:05:31 -0500 Subject: [PATCH 28/35] add a docs section on fuzzing --- docs/contributing-fuzzing.rst | 331 ++++++++++++++++++++++++++++++++++ docs/contributing.rst | 3 + docs/spelling_wordlist.txt | 9 + 3 files changed, 343 insertions(+) create mode 100644 docs/contributing-fuzzing.rst diff --git a/docs/contributing-fuzzing.rst b/docs/contributing-fuzzing.rst new file mode 100644 index 00000000000..330a54a92ef --- /dev/null +++ b/docs/contributing-fuzzing.rst @@ -0,0 +1,331 @@ +.. _fuzzing_guidelines: + +Fuzzing Native Code +=================== + +This document describes how to add fuzzing harnesses for native C/C++ code in dd-trace-py. + +What is Fuzzing? +---------------- + +Fuzzing is an automated testing technique that feeds random or mutated inputs to code to discover +bugs, crashes, and security vulnerabilities. For native C/C++ code, fuzzing can detect: + +* Buffer overflows and memory corruption +* Use-after-free bugs +* Integer overflows +* Null pointer dereferences +* Undefined behavior + +dd-trace-py uses **libFuzzer** with **AddressSanitizer (ASAN)** and **UndefinedBehaviorSanitizer (UBSAN)** +to continuously test native code components. + +Fuzzing Infrastructure Overview +-------------------------------- + +The repository has a "0 click onboarding" fuzzing infrastructure that automatically discovers, +builds, uploads, and runs fuzzing harnesses. + +**How it works:** + +1. **Discovery**: CI recursively searches for ``**/fuzz/build.sh`` files anywhere in the repository +2. **Build**: Each discovered ``build.sh`` script is executed to compile fuzzing binaries +3. **Registration**: Built binaries are uploaded to Datadog's internal fuzzing platform +4. **Continuous Fuzzing**: Binaries run continuously with crash reporting to Slack (``#fuzzing-ops``) + +**Architecture**:: + + Repository + └── / + └── fuzz/ + ├── build.sh # Build script (auto-discovered) + ├── fuzz_*.cpp # Fuzzing harness + └── CMakeLists.txt # Build configuration + + CI Pipeline (.gitlab/fuzz.yml): + 1. Discover: glob.glob("**/fuzz/build.sh") + 2. Build: Execute each build.sh + 3. Collect: Read /tmp/fuzz/build/fuzz_binaries.txt + 4. Upload: POST binaries to fuzzing API + 5. Register: Create continuous fuzzer + 6. Report: Crashes sent to Slack + +Adding a New Fuzzing Harness +----------------------------- + +1. Create Directory Structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a ``fuzz/`` subdirectory in your component: + +.. code-block:: bash + + $ mkdir -p path/to/your/component/fuzz/ + $ cd path/to/your/component/fuzz/ + +2. Write Your Fuzzing Harness +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a C/C++ file implementing the libFuzzer interface: + +.. code-block:: cpp + + // fuzz_your_component.cpp + #include + #include + #include "your_component.h" // Your code to test + + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + if (size == 0) { + return 0; + } + + // Call your code with fuzzer-generated input + your_function_to_test(data, size); + + return 0; // Continue fuzzing + } + +**Key points:** + +* Implement ``LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)`` +* Return 0 to continue fuzzing +* Keep the harness simple - let sanitizers catch bugs +* Handle empty inputs gracefully + +3. Create CMakeLists.txt +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.19) + + add_executable(fuzz_your_component + fuzz_your_component.cpp + ../src/your_source.c + ) + + target_include_directories(fuzz_your_component PRIVATE ../include) + + if(STACK_USE_LIBFUZZER) + target_compile_options(fuzz_your_component PRIVATE + -fsanitize=fuzzer,address,undefined + -fno-omit-frame-pointer + ) + target_link_options(fuzz_your_component PRIVATE + -fsanitize=fuzzer,address,undefined + ) + endif() + +4. Create build.sh Script +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create an executable ``build.sh``: + +.. code-block:: bash + + #!/bin/bash + set -e + + TARGET=fuzz_your_component + BUILD_DIR=/tmp/fuzz/build/your_component # Use unique subdirectory + MANIFEST_FILE=/tmp/fuzz/build/fuzz_binaries.txt + + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + SOURCE_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" + + cmake -S "${SCRIPT_DIR}" -B "${BUILD_DIR}" \ + -DSTACK_USE_LIBFUZZER=ON \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_C_FLAGS="-O1 -g -fsanitize=address,undefined" \ + -DCMAKE_CXX_FLAGS="-O1 -g -fsanitize=address,undefined" \ + && cmake --build "${BUILD_DIR}" -j --target $TARGET + + # Register binary in manifest (REQUIRED) + BINARY_PATH="${BUILD_DIR}/${TARGET}" + if [ -x "${BINARY_PATH}" ]; then + echo "${BINARY_PATH}" >> "${MANIFEST_FILE}" + echo "✅ Registered binary: ${BINARY_PATH}" + else + echo "❌ Binary not found: ${BINARY_PATH}" + exit 1 + fi + +**Make executable:** ``chmod +x build.sh`` + +**Critical requirements:** + +* Script must be named exactly ``build.sh`` +* Must append binary path to ``/tmp/fuzz/build/fuzz_binaries.txt`` +* Use a unique ``BUILD_DIR`` subdirectory +* Exit with non-zero status if build fails + +5. Test Locally +~~~~~~~~~~~~~~~~ + +**Using Docker** (recommended): + +.. code-block:: bash + + $ docker build -f docker/Dockerfile.fuzz -t ddtrace-py-fuzz . + $ docker run --rm -it ddtrace-py-fuzz + +**Local build:** + +.. code-block:: bash + + $ cd path/to/your/component/fuzz/ + $ ./build.sh + $ /tmp/fuzz/build/your_component/fuzz_your_component -max_total_time=60 + +6. Commit and Push +~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + $ git add path/to/your/component/fuzz/ + $ git commit -m "feat: add fuzzing for your component" + $ git push + +7. Trigger Fuzzing Job in CI +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The fuzzing job runs automatically on the ``main`` branch but must be triggered manually on pull requests. + +**In your GitLab CI pipeline:** + +1. Go to your merge request's **Pipelines** tab +2. Find the ``fuzz_infra`` job (it will show as "manual" or have a play button) +3. Click the play button (▶) to trigger the job + +This builds your fuzzer, uploads it to the fuzzing platform, and verifies it works correctly. + +**After merging to main**, the fuzzer runs automatically on every commit and continuously in the background. + +Example: Existing Fuzzer +------------------------- + +See the profiling stack sampler fuzzer for a complete example: + +.. code-block:: text + + ddtrace/internal/datadog/profiling/stack/fuzz/ + ├── build.sh + ├── fuzz_echion_remote_read.cpp + └── CMakeLists.txt + +This fuzzer tests echion's ability to parse Python stack frames from remote processes. + +Advanced: Testing Remote Process Memory Reads +---------------------------------------------- + +For code that reads memory from remote processes (like echion), use conditional compilation +to replace the real memory read function with a mock: + +.. code-block:: cpp + + // In your header file (e.g., vm.h) + #if defined(YOUR_COMPONENT_FUZZING) + extern "C" int your_fuzz_copy_memory(pid_t pid, void* addr, + size_t len, void* buf); + #define copy_memory your_fuzz_copy_memory + #else + int copy_memory(pid_t pid, void* addr, size_t len, void* buf); + #endif + + // In your fuzzer harness + static thread_local const uint8_t* g_data = nullptr; + static thread_local size_t g_size = 0; + + extern "C" int your_fuzz_copy_memory(pid_t pid, void* addr, + size_t len, void* buf) { + // Serve fuzzer input bytes as "fake remote memory" + // ... bounds checking ... + memcpy(buf, g_data + offset, len); + return 0; + } + +See ``ddtrace/internal/datadog/profiling/stack/fuzz/fuzz_echion_remote_read.cpp`` for a complete example. + +Common Build Options +-------------------- + +**Compiler flags:** + +``-O1`` + Light optimization for reasonable performance while preserving debuggability + +``-g`` + Include debug symbols for better crash reports + +``-fno-omit-frame-pointer`` + Required for accurate ASAN stack traces + +``-fsanitize=fuzzer`` + Enable libFuzzer instrumentation + +``-fsanitize=address`` + Enable AddressSanitizer for memory error detection + +``-fsanitize=undefined`` + Enable UndefinedBehaviorSanitizer + +**libFuzzer runtime options:** + +.. code-block:: bash + + $ ./fuzzer corpus/ -max_total_time=60 -max_len=4096 -jobs=4 + +``-max_total_time=N`` + Run for N seconds then exit + +``-max_len=N`` + Limit input size to N bytes + +``-jobs=N`` + Run N parallel fuzzing jobs + +``-artifact_prefix=path/`` + Store crash artifacts in this directory + +Current Limitations +------------------- + +**Single Python Version** + Fuzzing currently only runs on Python 3.12.3, despite dd-trace-py supporting Python 3.9-3.14. + Bugs in version-specific code paths may not be discovered. + +Resources and References +------------------------ + +**libFuzzer documentation:** + https://llvm.org/docs/LibFuzzer.html + +**AddressSanitizer:** + https://clang.llvm.org/docs/AddressSanitizer.html + +**UndefinedBehaviorSanitizer:** + https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html + +**Fuzzing best practices:** + https://github.com/google/fuzzing/blob/master/docs/good-fuzz-target.md + +**Example fuzzer in this repository:** + ``ddtrace/internal/datadog/profiling/stack/fuzz/fuzz_echion_remote_read.cpp`` + +**Crash reports:** + Check ``#fuzzing-ops`` Slack channel + +Quick Start Checklist +--------------------- + +1. ☐ Create ``fuzz/`` directory in your component +2. ☐ Write ``fuzz_*.cpp`` implementing ``LLVMFuzzerTestOneInput()`` +3. ☐ Create ``CMakeLists.txt`` with fuzzer build configuration +4. ☐ Create executable ``build.sh`` that builds and registers binary +5. ☐ Test locally with Docker or manual build +6. ☐ Commit and push +7. ☐ Manually trigger ``fuzz_infra`` job in GitLab CI (on pull requests) +8. ☐ Monitor ``#fuzzing-ops`` for crash reports diff --git a/docs/contributing.rst b/docs/contributing.rst index 91585eb953a..6618b914edf 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -16,6 +16,8 @@ If you're trying to set up a local development environment, read `this `_. +`Fuzzing native code documentation for contributors `_. + Thanks for working with us! .. _change_process: @@ -161,6 +163,7 @@ about Instrumentation Telemetry. contributing-design contributing-integrations contributing-testing + contributing-fuzzing contributing-tracing contributing-release releasenotes diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 13c8384e5ad..6f7ba029f0f 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -20,6 +20,7 @@ appsec AppSec aredis args +ASAN ascii asgi asm @@ -120,6 +121,9 @@ flamegraph fnmatch formatter freezegun +fuzzer +fuzzers +fuzzing gdb genai generativeai @@ -135,6 +139,8 @@ grpc gRPC gunicorn Gunicorn +harness +harnesses hostname hostnames hotspot @@ -168,8 +174,10 @@ kwarg kwargs langchain langchain_community +libclang libdatadog libddwaf +libFuzzer lifecycle linters litellm @@ -313,6 +321,7 @@ tokenizer tracecontext tracestate tweens +UBSAN # docs/configuration.rst uest unbuffered From e0cc5aad6d64c08770e833a1791ea496f163ff56 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 14:10:40 -0500 Subject: [PATCH 29/35] Set owners for fuzzing related files --- .github/CODEOWNERS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index d133481191c..8a054562e4e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -285,3 +285,8 @@ tests/contrib/**/test_*dsm.py @DataDog/data-streams- tests/**/*appsec* @DataDog/asm-python tests/**/*iast* @DataDog/asm-python tests/tracer/test_propagation.py @DataDog/apm-sdk-capabilities-python @DataDog/asm-python + +# Fuzzing +.gitlab/fuzz.yml @DataDog/chaos-platform @DataDog/profiling-python +.gitlab/scripts/fuzz_infra.py @DataDog/chaos-platform @DataDog/profiling-python +docker/Dockerfile.fuzz @DataDog/chaos-platform @DataDog/profiling-python From 42ad6a1eed6734a40c19f2cdb76f44e41db4c8cb Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Tue, 20 Jan 2026 14:12:05 -0500 Subject: [PATCH 30/35] Add datadog internal docs link --- docs/contributing-fuzzing.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/contributing-fuzzing.rst b/docs/contributing-fuzzing.rst index 330a54a92ef..10a864fbffe 100644 --- a/docs/contributing-fuzzing.rst +++ b/docs/contributing-fuzzing.rst @@ -300,6 +300,9 @@ Current Limitations Resources and References ------------------------ +**Datadog Fuzzing Platform (internal):** + https://datadoghq.atlassian.net/wiki/spaces/RESENG/pages/2147976713/Fuzzing + **libFuzzer documentation:** https://llvm.org/docs/LibFuzzer.html From 7256187bd0d9118ebcabd030e5aa679c693dc046 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 21 Jan 2026 10:19:44 +0100 Subject: [PATCH 31/35] Add spelling wordlist --- docs/spelling_wordlist.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 6f7ba029f0f..64bff0926f2 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -4,8 +4,8 @@ adk agentless aiobotocore aiohttp -aiomysql aiokafka +aiomysql aiopg aioredis algolia @@ -20,6 +20,7 @@ appsec AppSec aredis args +ARN ASAN ascii asgi @@ -39,7 +40,6 @@ avro Avro aws AWS -ARN backend backends backoff @@ -69,13 +69,13 @@ composable config contextvar contextvars +contrib coroutine coroutines CPU CPython CUPTI Cython -contrib datadog datadoghq dataset @@ -84,8 +84,10 @@ datastores dbapi ddtrace deallocating +debuggability decompiling deprecations +dereferences DES deserializing django @@ -207,6 +209,7 @@ mysql MySQL mysqlclient mysqldb +onboarding # tests/contrib/openai/test_openai_v1.py Nam namespace From 60e0dd821a4fead0b46e715687d72d57e3c3e02e Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Wed, 21 Jan 2026 12:04:43 +0100 Subject: [PATCH 32/35] empty commit From 34c0817cdf40d38bb0dbfff5af3cf54770ea7ab5 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Thu, 22 Jan 2026 17:45:16 +0100 Subject: [PATCH 33/35] PR comments --- .gitlab-ci.yml | 2 +- .gitlab/fuzz.yml | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b19f5fccb66..4b8ee4316c1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,7 +1,7 @@ stages: - - fuzz - package - tests + - fuzz - shared-pipeline - benchmarks - release diff --git a/.gitlab/fuzz.yml b/.gitlab/fuzz.yml index a495965ed75..3f93c4cf6b7 100644 --- a/.gitlab/fuzz.yml +++ b/.gitlab/fuzz.yml @@ -3,21 +3,19 @@ variables: # CI_DEBUG_SERVICES: "true" fuzz_infra: + needs: [] image: name: registry.ddbuild.io/images/mirror/ubuntu:24.04 tags: ["arch:amd64"] stage: fuzz - timeout: 30m + timeout: 5m allow_failure: true rules: # runs on gitlab schedule and on merge to main. # Also allow manual run in branches for ease of debug / testing - if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "schedule"' - allow_failure: true - - if: $CI_COMMIT_BRANCH == "main" - allow_failure: true + - if: $NIGHTLY_BUILD == "true" - when: manual - allow_failure: true before_script: # Install build dependencies (same as docker/Dockerfile.fuzz) # TODO(taegyunkim): Fuzz with all supported versions of Python (3.9 - 3.14). From 0f3bf5a7c492fce9086ecef36ad36ea2646fcfab Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Thu, 22 Jan 2026 17:50:11 +0100 Subject: [PATCH 34/35] bad merge dup values... --- .gitlab-ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bf41de0c600..4b8ee4316c1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,5 +1,4 @@ stages: - - fuzz - package - tests - fuzz From 51cb9307060c15f1486fc60c9c1120bbacc6d392 Mon Sep 17 00:00:00 2001 From: Edouard Schweisguth Date: Fri, 23 Jan 2026 10:21:22 +0100 Subject: [PATCH 35/35] Remove scheduled pipeline trigger, only use nightly + manual --- .gitlab/fuzz.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.gitlab/fuzz.yml b/.gitlab/fuzz.yml index 3f93c4cf6b7..c81fda81227 100644 --- a/.gitlab/fuzz.yml +++ b/.gitlab/fuzz.yml @@ -11,10 +11,9 @@ fuzz_infra: timeout: 5m allow_failure: true rules: - # runs on gitlab schedule and on merge to main. - # Also allow manual run in branches for ease of debug / testing - - if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "schedule"' + # runs during nightly builds - if: $NIGHTLY_BUILD == "true" + # Also allow manual run in branches for ease of debug / testing - when: manual before_script: # Install build dependencies (same as docker/Dockerfile.fuzz)