Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
a3985b8
WIP Fuzzer
edznux-dd Dec 15, 2025
d669a17
onboard to internal fuzzing infra
edznux-dd Dec 17, 2025
7b82628
fix rebase errors
edznux-dd Jan 14, 2026
aa636fa
long.cc ?
edznux-dd Jan 14, 2026
88f1c5a
linter
edznux-dd Jan 14, 2026
36d3fd8
format
edznux-dd Jan 14, 2026
0570e89
format CMakeLists.txt
taegyunkim Jan 20, 2026
8090d8c
Merge branch 'main' into edouard/add-base-fuzzing-setup
taegyunkim Jan 20, 2026
32b5f22
add all echion cc files
taegyunkim Jan 20, 2026
5aa5dd7
vm.cc needs to be removed as that defines copy_memory again, leading …
taegyunkim Jan 20, 2026
14117e0
Add a comment on Python version
taegyunkim Jan 20, 2026
4db133c
Add a comment on base image and using the same image as in .gitlab/fu…
taegyunkim Jan 20, 2026
4523978
add a docs section on fuzzing
taegyunkim Jan 20, 2026
d351f67
Set owners for fuzzing related files
taegyunkim Jan 20, 2026
92ba050
Add datadog internal docs link
taegyunkim Jan 20, 2026
2321f3f
Add spelling wordlist
edznux-dd Jan 21, 2026
f372282
empty commit
edznux-dd Jan 21, 2026
d8b12d9
WIP Fuzzer
edznux-dd Dec 15, 2025
d23e8b2
onboard to internal fuzzing infra
edznux-dd Dec 17, 2025
7baa02e
fix rebase errors
edznux-dd Jan 14, 2026
08f708b
long.cc ?
edznux-dd Jan 14, 2026
cd1e2aa
linter
edznux-dd Jan 14, 2026
72ca117
format
edznux-dd Jan 14, 2026
2f4266e
format CMakeLists.txt
taegyunkim Jan 20, 2026
4c7af01
add all echion cc files
taegyunkim Jan 20, 2026
94414ff
vm.cc needs to be removed as that defines copy_memory again, leading …
taegyunkim Jan 20, 2026
23ca79d
Add a comment on Python version
taegyunkim Jan 20, 2026
db106ea
Add a comment on base image and using the same image as in .gitlab/fu…
taegyunkim Jan 20, 2026
3064e55
add a docs section on fuzzing
taegyunkim Jan 20, 2026
e0cc5aa
Set owners for fuzzing related files
taegyunkim Jan 20, 2026
42ad6a1
Add datadog internal docs link
taegyunkim Jan 20, 2026
7256187
Add spelling wordlist
edznux-dd Jan 21, 2026
60e0dd8
empty commit
edznux-dd Jan 21, 2026
bf77a73
Merge branch 'main' into edouard/add-base-fuzzing-setup
edznux-dd Jan 21, 2026
34c0817
PR comments
edznux-dd Jan 22, 2026
47529f2
Merge branch 'edouard/add-base-fuzzing-setup' of github.com:DataDog/d…
edznux-dd Jan 22, 2026
0f3bf5a
bad merge dup values...
edznux-dd Jan 22, 2026
81180f3
Merge branch 'main' into edouard/add-base-fuzzing-setup
taegyunkim Jan 22, 2026
51cb930
Remove scheduled pipeline trigger, only use nightly + manual
edznux-dd Jan 23, 2026
c586598
Merge branch 'main' into edouard/add-base-fuzzing-setup
edznux-dd Jan 26, 2026
7db6d6e
Merge branch 'main' into edouard/add-base-fuzzing-setup
edznux-dd Jan 27, 2026
5c7739e
Merge branch 'main' into edouard/add-base-fuzzing-setup
edznux-dd Jan 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -285,3 +285,8 @@ tests/contrib/**/test_*dsm.py @DataDog/data-streams-
tests/**/*appsec* @DataDog/asm-python
tests/**/*iast* @DataDog/asm-python
tests/tracer/test_propagation.py @DataDog/apm-sdk-capabilities-python @DataDog/asm-python

# Fuzzing
.gitlab/fuzz.yml @DataDog/chaos-platform @DataDog/profiling-python
.gitlab/scripts/fuzz_infra.py @DataDog/chaos-platform @DataDog/profiling-python
docker/Dockerfile.fuzz @DataDog/chaos-platform @DataDog/profiling-python
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,6 @@ tests/appsec/iast/fixtures/taint_sinks/not_exists.txt

# Rust build artifacts
src/native/target*

# Fuzzing corpus, output and artifacts
.fuzz/
2 changes: 2 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
stages:
- package
- tests
- fuzz
- shared-pipeline
- benchmarks
- release
Expand Down Expand Up @@ -53,6 +54,7 @@ include:
- local: ".gitlab/multi-os-tests.yml"
- local: ".gitlab/benchmarks/serverless.yml"
- local: ".gitlab/native.yml"
- local: ".gitlab/fuzz.yml"

tests-gen:
stage: tests
Expand Down
30 changes: 30 additions & 0 deletions .gitlab/fuzz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
variables:
REPO_LANG: python # "python" is used everywhere rather than "py"
# CI_DEBUG_SERVICES: "true"

fuzz_infra:
needs: []
image:
name: registry.ddbuild.io/images/mirror/ubuntu:24.04
tags: ["arch:amd64"]
stage: fuzz
timeout: 5m
allow_failure: true
rules:
# runs during nightly builds
- if: $NIGHTLY_BUILD == "true"
# Also allow manual run in branches for ease of debug / testing
- when: manual
before_script:
# Install build dependencies (same as docker/Dockerfile.fuzz)
# TODO(taegyunkim): Fuzz with all supported versions of Python (3.9 - 3.14).
# On ubuntu:24.04 image, python3 version defaults to 3.12.3, meaning that
# fuzzing will only run for binary that is linked with that version of
# Python.
- apt-get update && apt-get install -y --no-install-recommends ca-certificates clang cmake git libclang-rt-dev lld make ninja-build python3 python3-dev python3-pip curl unzip
- python3 -m pip install requests --break-system-packages
# Install vault for fuzzing API authentication
- VAULT_VERSION=1.21.1 && curl -fsSL "https://releases.hashicorp.com/vault/${VAULT_VERSION}/vault_${VAULT_VERSION}_linux_amd64.zip" -o vault.zip && unzip vault.zip && mv vault /usr/local/bin/vault && rm vault.zip && chmod +x /usr/local/bin/vault
- git config --global --add safe.directory ${CI_PROJECT_DIR}
script:
- python3 .gitlab/scripts/fuzz_infra.py
247 changes: 247 additions & 0 deletions .gitlab/scripts/fuzz_infra.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
#!/usr/bin/env python3

# This script enables "0 click onboarding" for new fuzzer in the dd-trace-py repository.
# This means that any new fuzzer should be automatically detected and run in the internal
# infrastructure with enrichments, reporting, triaging, auto fix etc...
# Reports are submitted via Slack, with the channel defined by SLACK_CHANNEL
#
# Requirements:
#
# This scripts assumes that:
# - Each fuzz target is built in a separate build directory named `fuzz` and having a `build.sh` script that builds
# the target.
# - The build script appends the path to the built binary to a "MANIFEST_FILE", allowing the discovery of each fuzz
# target by the script.

from __future__ import annotations

from dataclasses import dataclass
import glob
import os
import subprocess
import sys
from typing import List

import requests


# TODO: replace me to dd-trace-py ops' slack channel once initial onboarding is done
SLACK_CHANNEL = "fuzzing-ops"
TEAM_NAME = "profiling-python"
REPOSITORY_URL = "https://github.com/DataDog/dd-trace-py"
PROJECT_NAME = "dd-trace-py"
# We currently only support libfuzzer for this repository.
FUZZ_TYPE = "libfuzzer"
API_URL = "https://fuzzing-api.us1.ddbuild.io/api/v1"

# Paths and constants for script execution
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
FUZZER_BINARY_BASE_PATH = "/tmp/fuzz/build"
MANIFEST_FILE = os.path.join(FUZZER_BINARY_BASE_PATH, "fuzz_binaries.txt")
MAX_PKG_NAME_LENGTH = 50
VAULT_PATH = "vault"


@dataclass(frozen=True)
class FuzzBinary:
"""Represents a built fuzz binary ready for upload."""

pkgname: str
binary_name: str
binary_path: str


def build_and_upload_fuzz(
team: str = TEAM_NAME,
slack_channel: str = SLACK_CHANNEL,
repository_url: str = REPOSITORY_URL,
) -> None:
git_sha = os.popen("git rev-parse HEAD").read().strip()

# Step 1: Discover and run all build scripts
build_scripts = discover_build_scripts(REPO_ROOT)
if not build_scripts:
print(f"❌ No fuzz build scripts found under {REPO_ROOT}")
return

# Clear any previous manifest file
if os.path.exists(MANIFEST_FILE):
os.remove(MANIFEST_FILE)

for build_script in build_scripts:
run_build_script(build_script)

# Step 2: Read the manifest file to discover built binaries
binaries = read_manifest(MANIFEST_FILE)
if not binaries:
print(f"❌ No fuzz binaries found in manifest {MANIFEST_FILE}")
return

# Step 3: Upload and create a fuzzer for each binary
for binary in binaries:
upload_binary(binary, git_sha)
create_fuzzer(binary, git_sha, team, slack_channel, repository_url)

print("✅ Fuzzing infrastructure setup completed successfully!")


def get_package_name(binary_name: str) -> str:
"""
Generate a package name for the fuzzing platform from a binary name.
It's prefixed with the repository name so it's easier to filter.
The package name is limited by k8s labels format: must be < 63 chars, alphamumeric and hyphen.
"""
return PROJECT_NAME + "-" + binary_name[:MAX_PKG_NAME_LENGTH].replace("_", "-")


def _is_executable(file_path: str) -> bool:
return os.path.isfile(file_path) and os.access(file_path, os.X_OK)


def discover_build_scripts(repo_root: str) -> List[str]:
"""
Discover fuzz build scripts by looking for '**/fuzz/build.sh'

This allows for "0 click onboarding" for new fuzz harnesses.
"""
build_scripts: List[str] = []
for build_script in glob.glob(os.path.join(repo_root, "**/fuzz/build.sh"), recursive=True):
print(f"Found build script: {build_script}")
build_scripts.append(build_script)
return build_scripts


def run_build_script(build_script: str) -> None:
"""Run a fuzz build script."""
fuzz_dir = os.path.dirname(build_script)
print(f"Building fuzz directory: {fuzz_dir}")

if not os.path.isfile(build_script):
raise FileNotFoundError(build_script)

try:
result = subprocess.run(
[build_script],
cwd=fuzz_dir,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
print(result.stdout)
if result.stderr:
print(result.stderr)
except subprocess.CalledProcessError as e:
print(f"❌ Build script failed with exit code {e.returncode}")
print(f"Command: {e.cmd}")
if e.stdout:
print(f"stdout:\n{e.stdout}")
if e.stderr:
print(f"stderr:\n{e.stderr}")
raise

print(f"✅ Built fuzzers from {build_script}")


def read_manifest(manifest_path: str) -> List[FuzzBinary]:
"""
Read the manifest file created by build scripts to discover built binaries.

Each build script appends its binary path(s) to this file.
"""
binaries: List[FuzzBinary] = []

if not os.path.isfile(manifest_path):
print(f"⚠️ No manifest file found at {manifest_path}")
return binaries

with open(manifest_path) as f:
for line in f:
binary_path = line.strip()
if not binary_path:
continue
if not os.path.isfile(binary_path):
print(f"⚠️ Binary listed in manifest not found: {binary_path}")
continue
if not _is_executable(binary_path):
print(f"⚠️ Binary listed in manifest is not executable: {binary_path}")
continue

binary_name = os.path.basename(binary_path)
print(f"Found fuzz binary: {binary_path}")
binaries.append(
FuzzBinary(
pkgname=get_package_name(binary_name),
binary_name=binary_name,
binary_path=binary_path,
)
)

return binaries


def create_fuzzer(binary: FuzzBinary, git_sha: str, team: str, slack_channel: str, repository_url: str) -> bool:
"""Register a fuzzer with the fuzzing platform."""
print(f"Starting fuzzer for {binary.pkgname} ({binary.binary_name})...")
run_payload = {
"app": binary.pkgname,
"debug": False,
"version": git_sha,
"type": FUZZ_TYPE,
"binary": binary.binary_name,
"team": team,
"slack_channel": slack_channel,
"repository_url": repository_url,
}
try:
response = requests.post(
f"{API_URL}/apps/{binary.pkgname}/fuzzers", headers=get_headers(), json=run_payload, timeout=30
)
response.raise_for_status()
print(f"✅ Started fuzzer for {binary.pkgname} ({binary.binary_name})")
print(response.json())
except Exception as e:
print(f"❌ Failed to start fuzzer for {binary.pkgname} ({binary.binary_name}): {e}")
return True

return False


def upload_binary(binary: FuzzBinary, git_sha: str) -> bool:
"""Upload a fuzz binary to the fuzzing platform."""
try:
# Get presigned URL so we can use s3 uploading
print(f"Getting presigned URL for {binary.pkgname} ({binary.binary_name})...")
presigned_response = requests.post(
f"{API_URL}/apps/{binary.pkgname}/builds/{git_sha}/url", headers=get_headers(), timeout=30
)

presigned_response.raise_for_status()
presigned_url = presigned_response.json()["data"]["url"]

print(f"Uploading {binary.pkgname} ({binary.binary_name}) for {git_sha}...")
with open(binary.binary_path, "rb") as f:
upload_response = requests.put(presigned_url, data=f, timeout=300)
upload_response.raise_for_status()
print(f"✅ Uploaded {binary.binary_name}")
except Exception as e:
print(f"❌ Failed to upload binary for {binary.pkgname} ({binary.binary_name}): {e}")
return True
return False


def get_headers():
auth_header = (
os.popen(f"{VAULT_PATH} read -field=token identity/oidc/token/security-fuzzing-platform").read().strip()
)
return {"Authorization": f"Bearer {auth_header}", "Content-Type": "application/json"}


if __name__ == "__main__":
print("🚀 Starting fuzzing infrastructure setup...")
try:
build_and_upload_fuzz()
print("✅ Fuzzing infrastructure setup completed successfully!")
except Exception as e:
print(f"❌ Failed to set up fuzzing infrastructure: {e}")
sys.exit(1)
15 changes: 15 additions & 0 deletions ddtrace/internal/datadog/profiling/stack/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,18 @@ if(BUILD_TESTING)
enable_testing()
add_subdirectory(test)
endif()

# Fuzzing harnesses (off by default)
if(NOT DEFINED BUILD_FUZZING)
set(BUILD_FUZZING
OFF
CACHE BOOL "Build fuzzing harnesses for stack_v2/echion")
else()
set(BUILD_FUZZING
ON
CACHE BOOL "Build fuzzing harnesses for stack_v2/echion")
endif()

if(BUILD_FUZZING)
add_subdirectory(fuzz)
endif()
13 changes: 13 additions & 0 deletions ddtrace/internal/datadog/profiling/stack/echion/echion/vm.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,21 @@ init_safe_copy()
*
* @return zero on success, otherwise non-zero.
*/
#if defined(ECHION_FUZZING)
// Let the fuzzing harness control the copy_memory behavior, so we can simulate "garbage" reads.
extern "C" int
echion_fuzz_copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf);

inline int
copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf)
{
return echion_fuzz_copy_memory(proc_ref, addr, len, buf);
}
#else
// Implementation in vm.cc
int
copy_memory(proc_ref_t proc_ref, const void* addr, ssize_t len, void* buf);
#endif

inline pid_t pid = 0;

Expand Down
Loading