Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ compile-tests:

clean: clean-cli clean-lib clean-tests

lint: lint-py lint-cli lint-lib
lint: lint-py lint-cli lint-lib lint-nix

compile: compile-cli compile-lib compile-tests

Expand Down
5 changes: 4 additions & 1 deletion cli-wrapper/cli/src/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ impl Recorder {
pub fn record(self) -> Result<(ExitStatus, tempfile::TempDir)> {
// reading and canonicalizing path to libprobe
let libprobe_path = fs::canonicalize(match std::env::var_os("PROBE_LIB") {
Some(x) => PathBuf::from(x),
Some(x) => {
log::debug!("Resolved PROBE_LIB to {}", x.to_string_lossy());
PathBuf::from(x)
}
None => return Err(eyre!("couldn't find libprobe, are you using the wrapper?")),
})
.wrap_err("unable to canonicalize libprobe path")?
Expand Down
2 changes: 1 addition & 1 deletion cli-wrapper/lib/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use bindgen::callbacks::ParseCallbacks;

fn find_in_cpath(name: &str) -> Result<PathBuf, String> {
let cpath = env::var("CPATH").map_err(|_| {
"CPATH needs to be set (in unicode) so I can find include header files".to_owned()
"CPATH needs to be set (to valid UTF-8) so I can find include header files".to_owned()
})?;
Ok(cpath
.split(':')
Expand Down
7 changes: 3 additions & 4 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -255,14 +255,15 @@
pypkgs.numpy
pypkgs.tqdm
pypkgs.types-tqdm
pypkgs.xdg-base-dirs

# probe_py.manual "dev time" requirements
pypkgs.types-tqdm
pypkgs.psutil
pypkgs.pytest
pypkgs.pytest-timeout
pypkgs.mypy
pypkgs.ipython
pypkgs.xdg-base-dirs

# libprobe build time requirement
pypkgs.pycparser
Expand Down Expand Up @@ -297,9 +298,7 @@
++ pkgs.lib.lists.optional (system != "i686-linux" && system != "armv7l-linux") pkgs.nextflow
++ pkgs.lib.lists.optional (system != "i686-linux" && system != "armv7l-linux") pkgs.jdk23_headless
# gdb broken on apple silicon
++ pkgs.lib.lists.optional (system != "aarch64-darwin") pkgs.gdb
# while xdot isn't marked as linux only, it has a dependency (xvfb-run) that is
++ pkgs.lib.lists.optional (builtins.elem system pkgs.lib.platforms.linux) pkgs.xdot;
++ pkgs.lib.lists.optional (system != "aarch64-darwin") pkgs.gdb;
};
};
}
Expand Down
2 changes: 1 addition & 1 deletion libprobe/generator/gen_libc_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,8 +557,8 @@ def wrapper_func_body(func: ParsedFunc) -> typing.Sequence[Node]:
struct rusage;
struct stat;
struct statx;
struct utimbuf;
struct timeval;
struct utimbuf;

/*
* There is some bug with pycparser unable to parse inline function pointers.
Expand Down
17 changes: 0 additions & 17 deletions libprobe/generator/libc_hooks_source.c
Original file line number Diff line number Diff line change
Expand Up @@ -3083,26 +3083,9 @@ int pthread_cancel(pthread_t thread) {
void* mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) {}
int munmap(void* addr, size_t length) { }

/*
* Don't need to interpose exit because we have a destructor associated with pthread_create_key(...).
* The destructor will more likely always be fired, whereas exit(...) could not be hit (e.g., return from main() or not using libc for some weird reason).
void exit (int status) {
void* pre_call = ({
struct Op op = {
exit_op_code,
{.exit = {.status = status}},
{0},
0,
0,
};
prov_log_try(op);
prov_log_record(op);
});
bool noreturn = true;
}
fn _exit = exit;
fn _Exit = exit;
*/

int pipe(int pipefd[2]) {
void* call = ({
Expand Down
6 changes: 3 additions & 3 deletions libprobe/include/libprobe/prov_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ struct InitExecEpochOp {
struct Path exe;
char* const* argv;
char* const* env;
struct Path stdin;
struct Path stdout;
struct Path stderr;
struct Path std_in;
struct Path std_out;
struct Path std_err;
};

struct InitThreadOp {
Expand Down
20 changes: 11 additions & 9 deletions libprobe/src/debug_logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

#define _GNU_SOURCE

#include "global_state.h" // for get_exec_epoch_safe, get_pid_safe, get_tid...
#include <errno.h> // for errno
#include <stdio.h> // for fprintf, stderr
#include <stdlib.h> // for exit, free
#include <string.h> // for strerror, strndup
#include "../generated/libc_hooks.h" // IWYU pragma: keep for unwrapped_exit
#include "global_state.h" // for get_exec_epoch_safe, get_pid_safe, get_tid...
#include <errno.h> // for errno
#include <stdio.h> // for fprintf, stderr
#include <stdlib.h> // for exit, free
#include <string.h> // for strerror, strndup

#ifndef NDEBUG
#define DEBUG_LOG 1
Expand All @@ -29,12 +30,13 @@

#define WARNING(str, ...) LOG("WARNING " str " (errno=%d)", ##__VA_ARGS__, errno)

/* TODO: replace assert with ASSERTF because ASSERTF calls unwrapped_exit() */
#define ERROR(str, ...) \
({ \
char* errno_str = strndup(strerror(errno), 4096); \
LOG("ERROR " str " (errno=%d %s)", ##__VA_ARGS__, errno, errno_str); \
free(errno_str); \
exit(1); \
LOG("ERROR " str " (errno=%d)", ##__VA_ARGS__, errno); \
/* TODO: check if unwrapped_exit == NULL and if so warn and syscall diectly */ \
unwrapped_exit(103); \
__builtin_unreachable(); \
})

/* TODO: Replace EXPECT, ASSERTF, NOT_IMPLEMENTED with explicit error handling: { ERR(...); return -1; } */
Expand Down
8 changes: 0 additions & 8 deletions libprobe/src/env.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@

#include "env.h"

extern char** environ;

void printenv() {
for (char** arg = environ; *arg; ++arg) {
DEBUG("printenv: %s", *arg);
}
}

const char* getenv_copy(const char* name) {
/* Validate input */
char* val = getenv(name);
Expand Down
16 changes: 7 additions & 9 deletions libprobe/src/global_state.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include <limits.h> // IWYU pragma: keep for PATH_MAX
#include <pthread.h> // for pthread_mutex_t
#include <stdbool.h> // for true, bool, false
#include <stdio.h> // for stderr, stdin, stdout
#include <stdlib.h> // for free
#include <string.h> // for memcpy, NULL, size_t, strnlen// for memcpy, NULL, size_t, strnlen
#include <sys/mman.h> // for mmap, PROT_*, MAP_*
Expand Down Expand Up @@ -161,7 +160,7 @@ static inline void init_process_context() {
/* We increment the epoch here, so if there is an exec later on, the epoch is already incremented when they see it. */
__process_context->epoch_no += 1;
DEBUG("__process_context = %p {.epoch = %d, pid_arena_path = %s}", __process_context,
__process_context->epoch_no, __process_context->pid_arena_path);
__process_context->epoch_no, __process_context->pid_arena_path.bytes);
}
void uninit_process_context() {
/* TODO: */
Expand Down Expand Up @@ -242,18 +241,18 @@ static inline void init_paths(struct ThreadState* state) {
pid_t pid = get_pid();
size_t exec_epoch = get_exec_epoch();
state->ops_path.len =
CHECK_SNPRINTF(state->ops_path.bytes, PATH_MAX, "%s/" PIDS_SUBDIR "/%d/%d/%d",
CHECK_SNPRINTF(state->ops_path.bytes, PATH_MAX, "%s/" PIDS_SUBDIR "/%d/%zu/%d",
probe_dir->bytes, pid, exec_epoch, state->tid);
check_fixed_path((&state->ops_path));
checked_mkdir(state->ops_path.bytes);

state->ops_path.len = CHECK_SNPRINTF(state->ops_path.bytes, PATH_MAX,
"%s/" PIDS_SUBDIR "/%d/%d/%d/" OPS_SUBDIR "/",
"%s/" PIDS_SUBDIR "/%d/%zu/%d/" OPS_SUBDIR "/",
probe_dir->bytes, pid, exec_epoch, state->tid);
check_fixed_path((&state->ops_path));

state->data_path.len = CHECK_SNPRINTF(state->data_path.bytes, PATH_MAX,
"%s/" PIDS_SUBDIR "/%d/%d/%d/" DATA_SUBDIR "/",
"%s/" PIDS_SUBDIR "/%d/%zu/%d/" DATA_SUBDIR "/",
probe_dir->bytes, pid, exec_epoch, state->tid);
check_fixed_path((&state->data_path));
}
Expand Down Expand Up @@ -367,10 +366,9 @@ static inline void emit_init_epoch_op() {
.exe = create_path_lazy(AT_FDCWD, exe.bytes, 0),
.argv = arena_copy_argv(get_data_arena(), argv, argc),
.env = arena_copy_argv(get_data_arena(), env, envc),
// TODO: See if stdin, stderr, stdout are really necessary
.stdin = create_path_lazy(AT_FDCWD, "/dev/stdin", 0),
.stdout = create_path_lazy(AT_FDCWD, "/dev/stdout", 0),
.stderr = create_path_lazy(AT_FDCWD, "/dev/stderr", 0),
.std_in = create_path_lazy(AT_FDCWD, "/dev/stdin", 0),
.std_out = create_path_lazy(AT_FDCWD, "/dev/stdout", 0),
.std_err = create_path_lazy(AT_FDCWD, "/dev/stderr", 0),
}},
{0},
0,
Expand Down
4 changes: 2 additions & 2 deletions libprobe/src/lookup_on_path.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@

#include <fcntl.h> // for AT_FDCWD
#include <stdbool.h> // for bool, false, true
#include <stdlib.h> // for getenv, size_t
#include <string.h> // for strlen
#include <unistd.h> // for X_OK

#include "../generated/libc_hooks.h" // for unwrapped_faccessat
#include "debug_logging.h" // for DEBUG
#include "env.h" // for getenv_copy
#include "global_state.h" // for get_default_path
#include "util.h" // for BORROWED, path_join

bool lookup_on_path(BORROWED const char* bin_name, BORROWED char* bin_path) {
size_t bin_name_length = strlen(bin_name);
char* env_path = getenv("PATH");
const char* env_path = getenv_copy("PATH");

/*
* If this variable isn't defined, the path list defaults
Expand Down
17 changes: 12 additions & 5 deletions probe_py/probe_py/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class OpType(enum.StrEnum):
ALL = enum.auto()
MINIMAL = enum.auto()
FILE = enum.auto()
SUCCESSFUL = enum.auto()


@export_app.command()
Expand All @@ -87,10 +88,14 @@ def hb_graph(
pathlib.Path,
typer.Argument(help="output file written by `probe record -o $file`."),
] = pathlib.Path("probe_log"),
retain_ops: Annotated[
retain: Annotated[
OpType,
typer.Option(help="Which ops to include in the graph?")
typer.Option(help="Which ops to include in the graph? There are quite a few.")
] = OpType.MINIMAL,
show_op_number: Annotated[
bool,
typer.Option(help="Whether to show the op number in the output.")
] = False,
) -> None:
"""
Write a happens-before graph on the operations in probe_log.
Expand All @@ -103,14 +108,16 @@ def hb_graph(
"""
probe_log = parser.parse_probe_log(path_to_probe_log)
hbg = hb_graph_module.probe_log_to_hb_graph(probe_log)
match retain_ops:
match retain:
case OpType.ALL:
pass
case OpType.MINIMAL:
hbg = hb_graph_module.retain_only(probe_log, hbg, lambda _node, _op: False)
hbg = hb_graph_module.retain_only(probe_log, hbg, lambda _node, op: isinstance(op.data, ops.InitExecEpochOp))
case OpType.FILE:
hbg = hb_graph_module.retain_only(probe_log, hbg, lambda node, op: isinstance(op.data, (ops.OpenOp, ops.CloseOp, ops.DupOp, ops.ExecOp)))
hb_graph_module.label_nodes(probe_log, hbg, retain_ops == OpType.ALL)
case OpType.SUCCESSFUL:
hbg = hb_graph_module.retain_only(probe_log, hbg, lambda node, op: getattr(op.data, "ferrno", 0) == 0 and not isinstance(op.data, ops.ReaddirOp))
hb_graph_module.label_nodes(probe_log, hbg, show_op_number)
graph_utils.serialize_graph(hbg, output)


Expand Down
6 changes: 3 additions & 3 deletions probe_py/probe_py/hb_graph_accesses.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ def openfd(
match op_data:
case ops.InitExecEpochOp():
if node.exec_no == ptypes.initial_exec_no and node.pid == root_pid:
yield from openfd(0, AccessMode.READ, False, node, op_data.stdin)
yield from openfd(1, AccessMode.TRUNCATE_WRITE, False, node, op_data.stdout)
yield from openfd(2, AccessMode.TRUNCATE_WRITE, False, node, op_data.stderr)
yield from openfd(0, AccessMode.READ, False, node, op_data.std_in)
yield from openfd(1, AccessMode.TRUNCATE_WRITE, False, node, op_data.std_out)
yield from openfd(2, AccessMode.TRUNCATE_WRITE, False, node, op_data.std_err)
case ops.OpenOp():
mode = AccessMode.from_open_flags(op_data.flags)
cloexec = bool(op_data.flags & os.O_CLOEXEC)
Expand Down
16 changes: 15 additions & 1 deletion probe_py/probe_py/ptypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pathlib
import random
import socket
import stat
import typing
import numpy
from . import ops
Expand Down Expand Up @@ -82,8 +83,18 @@ class Inode:
host: Host
device: Device
number: int
mode: int

@property
def type(self) -> str:
return stat.filemode(self.mode)[0]

@property
def is_fifo(self) -> bool:
return stat.S_ISFIFO(self.mode)

def __str__(self) -> str:
return f"inode {self.number} on {self.device} @{self.host.name}"
return f"inode {self.type.upper()} {self.number} on {self.device} @{self.host.name}"


@dataclasses.dataclass(frozen=True)
Expand Down Expand Up @@ -114,6 +125,7 @@ def from_local_path(path: pathlib.Path) -> InodeVersion:
os.minor(s.st_dev),
),
s.st_ino,
s.st_mode,
),
numpy.datetime64(s.st_mtime_ns, "ns"),
s.st_size,
Expand All @@ -126,6 +138,7 @@ def from_probe_path(path: ops.Path) -> InodeVersion:
Host.localhost(),
Device(path.device_major, path.device_minor),
path.inode,
path.mode,
),
numpy.datetime64(path.mtime.sec * int(1e9) + path.mtime.nsec, "ns"),
path.size,
Expand All @@ -144,6 +157,7 @@ def from_id_string(id_string: str) -> InodeVersion:
Host.localhost(),
Device(array[0], array[1]),
array[2],
0,
),
numpy.datetime64(array[3] * int(1e9) + array[4], "ns"),
array[5],
Expand Down
1 change: 1 addition & 0 deletions tests/examples/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
*.exe
*.so
*.o
2 changes: 1 addition & 1 deletion tests/examples/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# If not, then PROBE does not interfere with ASAN.

CC ?= clang
CFLAGS ?= -Wall -Wextra -Werror -Og -g
CFLAGS ?= -Wall -Wextra -Werror -Og -g -Wno-unused-command-line-argument

SOURCE_FILES := $(wildcard *.c)

Expand Down
5 changes: 3 additions & 2 deletions tests/examples/libprobe_without_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,9 @@ def new(libprobe_path: pathlib.Path, copy_files: CopyFilesMode) -> ProcessContex
f"--eval-command=set environment LD_PRELOAD {libprobe!s}",
f"--eval-command=set environment PROBE_DIR {probe_dir!s}",
f"--eval-command=set environment LD_DEBUG {LD_DEBUG}",
"--eval-command=run",
"--eval-command=backtrace",
"--eval-command=set startup-with-shell off",
"--eval-command=starti",
# "--eval-command=backtrace",
"--args",
cmd,
*args,
Expand Down
2 changes: 1 addition & 1 deletion tests/examples/mmap_cat.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ int main (int argc, char **argv) {

char* buffer = (char *) mmap(NULL, statx_result.stx_size, PROT_READ, MAP_SHARED, fd, 0);
if (buffer == NULL || buffer == MAP_FAILED) {
fprintf(stderr, "Could not mmap fd=%d /* \"%s\" */, size=%lld\n", fd, argv[1], statx_result.stx_size);
fprintf(stderr, "Could not mmap fd=%d /* \"%s\" */, size=%lld\n", fd, argv[1], (unsigned long long)statx_result.stx_size);
perror("mmap");
exit(1);
}
Expand Down
10 changes: 8 additions & 2 deletions tests/test_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,15 @@ def scratch_directory(
@pytest.mark.parametrize("copy_files", [
"none",
"lazily",
"eagerly",
# "eagerly",
])
@pytest.mark.parametrize("debug", [
False,
# True,
], ids=[
"opt",
# "dbg",
])
@pytest.mark.parametrize("debug", [False, True], ids=["opt", "dbg"])
@pytest.mark.parametrize(
"command",
{**simple_commands, **complex_commands}.values(),
Expand Down