diff --git a/Justfile b/Justfile index 6882a72a..18e6e174 100644 --- a/Justfile +++ b/Justfile @@ -57,7 +57,7 @@ compile-tests: clean: clean-cli clean-lib clean-tests -lint: lint-py lint-cli lint-lib +lint: lint-py lint-cli lint-lib lint-nix compile: compile-cli compile-lib compile-tests diff --git a/cli-wrapper/cli/src/record.rs b/cli-wrapper/cli/src/record.rs index 14c79044..f9d4a7d2 100644 --- a/cli-wrapper/cli/src/record.rs +++ b/cli-wrapper/cli/src/record.rs @@ -120,7 +120,10 @@ impl Recorder { pub fn record(self) -> Result<(ExitStatus, tempfile::TempDir)> { // reading and canonicalizing path to libprobe let libprobe_path = fs::canonicalize(match std::env::var_os("PROBE_LIB") { - Some(x) => PathBuf::from(x), + Some(x) => { + log::debug!("Resolved PROBE_LIB to {}", x.to_string_lossy()); + PathBuf::from(x) + } None => return Err(eyre!("couldn't find libprobe, are you using the wrapper?")), }) .wrap_err("unable to canonicalize libprobe path")? diff --git a/cli-wrapper/lib/build.rs b/cli-wrapper/lib/build.rs index 779f98f0..b9ac51ba 100644 --- a/cli-wrapper/lib/build.rs +++ b/cli-wrapper/lib/build.rs @@ -7,7 +7,7 @@ use bindgen::callbacks::ParseCallbacks; fn find_in_cpath(name: &str) -> Result { let cpath = env::var("CPATH").map_err(|_| { - "CPATH needs to be set (in unicode) so I can find include header files".to_owned() + "CPATH needs to be set (to valid UTF-8) so I can find include header files".to_owned() })?; Ok(cpath .split(':') diff --git a/flake.nix b/flake.nix index c476394e..f4900d04 100644 --- a/flake.nix +++ b/flake.nix @@ -255,14 +255,15 @@ pypkgs.numpy pypkgs.tqdm pypkgs.types-tqdm + pypkgs.xdg-base-dirs # probe_py.manual "dev time" requirements + pypkgs.types-tqdm pypkgs.psutil pypkgs.pytest pypkgs.pytest-timeout pypkgs.mypy pypkgs.ipython - pypkgs.xdg-base-dirs # libprobe build time requirement pypkgs.pycparser @@ -297,9 +298,7 @@ ++ pkgs.lib.lists.optional (system != "i686-linux" && system != "armv7l-linux") pkgs.nextflow ++ pkgs.lib.lists.optional (system != "i686-linux" && system != "armv7l-linux") pkgs.jdk23_headless # gdb broken on apple silicon - ++ pkgs.lib.lists.optional (system != "aarch64-darwin") pkgs.gdb - # while xdot isn't marked as linux only, it has a dependency (xvfb-run) that is - ++ pkgs.lib.lists.optional (builtins.elem system pkgs.lib.platforms.linux) pkgs.xdot; + ++ pkgs.lib.lists.optional (system != "aarch64-darwin") pkgs.gdb; }; }; } diff --git a/libprobe/generator/gen_libc_hooks.py b/libprobe/generator/gen_libc_hooks.py index fb7ebde9..bde7f61e 100755 --- a/libprobe/generator/gen_libc_hooks.py +++ b/libprobe/generator/gen_libc_hooks.py @@ -557,8 +557,8 @@ def wrapper_func_body(func: ParsedFunc) -> typing.Sequence[Node]: struct rusage; struct stat; struct statx; -struct utimbuf; struct timeval; +struct utimbuf; /* * There is some bug with pycparser unable to parse inline function pointers. diff --git a/libprobe/generator/libc_hooks_source.c b/libprobe/generator/libc_hooks_source.c index 3391fcd6..4744e7d3 100644 --- a/libprobe/generator/libc_hooks_source.c +++ b/libprobe/generator/libc_hooks_source.c @@ -3083,26 +3083,9 @@ int pthread_cancel(pthread_t thread) { void* mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) {} int munmap(void* addr, size_t length) { } -/* - * Don't need to interpose exit because we have a destructor associated with pthread_create_key(...). - * The destructor will more likely always be fired, whereas exit(...) could not be hit (e.g., return from main() or not using libc for some weird reason). void exit (int status) { - void* pre_call = ({ - struct Op op = { - exit_op_code, - {.exit = {.status = status}}, - {0}, - 0, - 0, - }; - prov_log_try(op); - prov_log_record(op); - }); bool noreturn = true; } -fn _exit = exit; -fn _Exit = exit; -*/ int pipe(int pipefd[2]) { void* call = ({ diff --git a/libprobe/include/libprobe/prov_ops.h b/libprobe/include/libprobe/prov_ops.h index f70c512e..db66acd3 100644 --- a/libprobe/include/libprobe/prov_ops.h +++ b/libprobe/include/libprobe/prov_ops.h @@ -63,9 +63,9 @@ struct InitExecEpochOp { struct Path exe; char* const* argv; char* const* env; - struct Path stdin; - struct Path stdout; - struct Path stderr; + struct Path std_in; + struct Path std_out; + struct Path std_err; }; struct InitThreadOp { diff --git a/libprobe/src/debug_logging.h b/libprobe/src/debug_logging.h index 4a4f1578..5a83d422 100644 --- a/libprobe/src/debug_logging.h +++ b/libprobe/src/debug_logging.h @@ -2,11 +2,12 @@ #define _GNU_SOURCE -#include "global_state.h" // for get_exec_epoch_safe, get_pid_safe, get_tid... -#include // for errno -#include // for fprintf, stderr -#include // for exit, free -#include // for strerror, strndup +#include "../generated/libc_hooks.h" // IWYU pragma: keep for unwrapped_exit +#include "global_state.h" // for get_exec_epoch_safe, get_pid_safe, get_tid... +#include // for errno +#include // for fprintf, stderr +#include // for exit, free +#include // for strerror, strndup #ifndef NDEBUG #define DEBUG_LOG 1 @@ -29,12 +30,13 @@ #define WARNING(str, ...) LOG("WARNING " str " (errno=%d)", ##__VA_ARGS__, errno) +/* TODO: replace assert with ASSERTF because ASSERTF calls unwrapped_exit() */ #define ERROR(str, ...) \ ({ \ - char* errno_str = strndup(strerror(errno), 4096); \ - LOG("ERROR " str " (errno=%d %s)", ##__VA_ARGS__, errno, errno_str); \ - free(errno_str); \ - exit(1); \ + LOG("ERROR " str " (errno=%d)", ##__VA_ARGS__, errno); \ + /* TODO: check if unwrapped_exit == NULL and if so warn and syscall diectly */ \ + unwrapped_exit(103); \ + __builtin_unreachable(); \ }) /* TODO: Replace EXPECT, ASSERTF, NOT_IMPLEMENTED with explicit error handling: { ERR(...); return -1; } */ diff --git a/libprobe/src/env.c b/libprobe/src/env.c index efeb73b6..29055387 100644 --- a/libprobe/src/env.c +++ b/libprobe/src/env.c @@ -13,14 +13,6 @@ #include "env.h" -extern char** environ; - -void printenv() { - for (char** arg = environ; *arg; ++arg) { - DEBUG("printenv: %s", *arg); - } -} - const char* getenv_copy(const char* name) { /* Validate input */ char* val = getenv(name); diff --git a/libprobe/src/global_state.c b/libprobe/src/global_state.c index cf4e04a8..6edb6d8e 100644 --- a/libprobe/src/global_state.c +++ b/libprobe/src/global_state.c @@ -5,7 +5,6 @@ #include // IWYU pragma: keep for PATH_MAX #include // for pthread_mutex_t #include // for true, bool, false -#include // for stderr, stdin, stdout #include // for free #include // for memcpy, NULL, size_t, strnlen// for memcpy, NULL, size_t, strnlen #include // for mmap, PROT_*, MAP_* @@ -161,7 +160,7 @@ static inline void init_process_context() { /* We increment the epoch here, so if there is an exec later on, the epoch is already incremented when they see it. */ __process_context->epoch_no += 1; DEBUG("__process_context = %p {.epoch = %d, pid_arena_path = %s}", __process_context, - __process_context->epoch_no, __process_context->pid_arena_path); + __process_context->epoch_no, __process_context->pid_arena_path.bytes); } void uninit_process_context() { /* TODO: */ @@ -242,18 +241,18 @@ static inline void init_paths(struct ThreadState* state) { pid_t pid = get_pid(); size_t exec_epoch = get_exec_epoch(); state->ops_path.len = - CHECK_SNPRINTF(state->ops_path.bytes, PATH_MAX, "%s/" PIDS_SUBDIR "/%d/%d/%d", + CHECK_SNPRINTF(state->ops_path.bytes, PATH_MAX, "%s/" PIDS_SUBDIR "/%d/%zu/%d", probe_dir->bytes, pid, exec_epoch, state->tid); check_fixed_path((&state->ops_path)); checked_mkdir(state->ops_path.bytes); state->ops_path.len = CHECK_SNPRINTF(state->ops_path.bytes, PATH_MAX, - "%s/" PIDS_SUBDIR "/%d/%d/%d/" OPS_SUBDIR "/", + "%s/" PIDS_SUBDIR "/%d/%zu/%d/" OPS_SUBDIR "/", probe_dir->bytes, pid, exec_epoch, state->tid); check_fixed_path((&state->ops_path)); state->data_path.len = CHECK_SNPRINTF(state->data_path.bytes, PATH_MAX, - "%s/" PIDS_SUBDIR "/%d/%d/%d/" DATA_SUBDIR "/", + "%s/" PIDS_SUBDIR "/%d/%zu/%d/" DATA_SUBDIR "/", probe_dir->bytes, pid, exec_epoch, state->tid); check_fixed_path((&state->data_path)); } @@ -367,10 +366,9 @@ static inline void emit_init_epoch_op() { .exe = create_path_lazy(AT_FDCWD, exe.bytes, 0), .argv = arena_copy_argv(get_data_arena(), argv, argc), .env = arena_copy_argv(get_data_arena(), env, envc), - // TODO: See if stdin, stderr, stdout are really necessary - .stdin = create_path_lazy(AT_FDCWD, "/dev/stdin", 0), - .stdout = create_path_lazy(AT_FDCWD, "/dev/stdout", 0), - .stderr = create_path_lazy(AT_FDCWD, "/dev/stderr", 0), + .std_in = create_path_lazy(AT_FDCWD, "/dev/stdin", 0), + .std_out = create_path_lazy(AT_FDCWD, "/dev/stdout", 0), + .std_err = create_path_lazy(AT_FDCWD, "/dev/stderr", 0), }}, {0}, 0, diff --git a/libprobe/src/lookup_on_path.c b/libprobe/src/lookup_on_path.c index f1083378..f8eb781f 100644 --- a/libprobe/src/lookup_on_path.c +++ b/libprobe/src/lookup_on_path.c @@ -4,18 +4,18 @@ #include // for AT_FDCWD #include // for bool, false, true -#include // for getenv, size_t #include // for strlen #include // for X_OK #include "../generated/libc_hooks.h" // for unwrapped_faccessat #include "debug_logging.h" // for DEBUG +#include "env.h" // for getenv_copy #include "global_state.h" // for get_default_path #include "util.h" // for BORROWED, path_join bool lookup_on_path(BORROWED const char* bin_name, BORROWED char* bin_path) { size_t bin_name_length = strlen(bin_name); - char* env_path = getenv("PATH"); + const char* env_path = getenv_copy("PATH"); /* * If this variable isn't defined, the path list defaults diff --git a/probe_py/probe_py/cli.py b/probe_py/probe_py/cli.py index 908fa51d..67c9afc5 100644 --- a/probe_py/probe_py/cli.py +++ b/probe_py/probe_py/cli.py @@ -75,6 +75,7 @@ class OpType(enum.StrEnum): ALL = enum.auto() MINIMAL = enum.auto() FILE = enum.auto() + SUCCESSFUL = enum.auto() @export_app.command() @@ -87,10 +88,14 @@ def hb_graph( pathlib.Path, typer.Argument(help="output file written by `probe record -o $file`."), ] = pathlib.Path("probe_log"), - retain_ops: Annotated[ + retain: Annotated[ OpType, - typer.Option(help="Which ops to include in the graph?") + typer.Option(help="Which ops to include in the graph? There are quite a few.") ] = OpType.MINIMAL, + show_op_number: Annotated[ + bool, + typer.Option(help="Whether to show the op number in the output.") + ] = False, ) -> None: """ Write a happens-before graph on the operations in probe_log. @@ -103,14 +108,16 @@ def hb_graph( """ probe_log = parser.parse_probe_log(path_to_probe_log) hbg = hb_graph_module.probe_log_to_hb_graph(probe_log) - match retain_ops: + match retain: case OpType.ALL: pass case OpType.MINIMAL: - hbg = hb_graph_module.retain_only(probe_log, hbg, lambda _node, _op: False) + hbg = hb_graph_module.retain_only(probe_log, hbg, lambda _node, op: isinstance(op.data, ops.InitExecEpochOp)) case OpType.FILE: hbg = hb_graph_module.retain_only(probe_log, hbg, lambda node, op: isinstance(op.data, (ops.OpenOp, ops.CloseOp, ops.DupOp, ops.ExecOp))) - hb_graph_module.label_nodes(probe_log, hbg, retain_ops == OpType.ALL) + case OpType.SUCCESSFUL: + hbg = hb_graph_module.retain_only(probe_log, hbg, lambda node, op: getattr(op.data, "ferrno", 0) == 0 and not isinstance(op.data, ops.ReaddirOp)) + hb_graph_module.label_nodes(probe_log, hbg, show_op_number) graph_utils.serialize_graph(hbg, output) diff --git a/probe_py/probe_py/hb_graph_accesses.py b/probe_py/probe_py/hb_graph_accesses.py index 4043b72e..062519c0 100644 --- a/probe_py/probe_py/hb_graph_accesses.py +++ b/probe_py/probe_py/hb_graph_accesses.py @@ -109,9 +109,9 @@ def openfd( match op_data: case ops.InitExecEpochOp(): if node.exec_no == ptypes.initial_exec_no and node.pid == root_pid: - yield from openfd(0, AccessMode.READ, False, node, op_data.stdin) - yield from openfd(1, AccessMode.TRUNCATE_WRITE, False, node, op_data.stdout) - yield from openfd(2, AccessMode.TRUNCATE_WRITE, False, node, op_data.stderr) + yield from openfd(0, AccessMode.READ, False, node, op_data.std_in) + yield from openfd(1, AccessMode.TRUNCATE_WRITE, False, node, op_data.std_out) + yield from openfd(2, AccessMode.TRUNCATE_WRITE, False, node, op_data.std_err) case ops.OpenOp(): mode = AccessMode.from_open_flags(op_data.flags) cloexec = bool(op_data.flags & os.O_CLOEXEC) diff --git a/probe_py/probe_py/ptypes.py b/probe_py/probe_py/ptypes.py index 3c2766b6..08463a0d 100644 --- a/probe_py/probe_py/ptypes.py +++ b/probe_py/probe_py/ptypes.py @@ -7,6 +7,7 @@ import pathlib import random import socket +import stat import typing import numpy from . import ops @@ -82,8 +83,18 @@ class Inode: host: Host device: Device number: int + mode: int + + @property + def type(self) -> str: + return stat.filemode(self.mode)[0] + + @property + def is_fifo(self) -> bool: + return stat.S_ISFIFO(self.mode) + def __str__(self) -> str: - return f"inode {self.number} on {self.device} @{self.host.name}" + return f"inode {self.type.upper()} {self.number} on {self.device} @{self.host.name}" @dataclasses.dataclass(frozen=True) @@ -114,6 +125,7 @@ def from_local_path(path: pathlib.Path) -> InodeVersion: os.minor(s.st_dev), ), s.st_ino, + s.st_mode, ), numpy.datetime64(s.st_mtime_ns, "ns"), s.st_size, @@ -126,6 +138,7 @@ def from_probe_path(path: ops.Path) -> InodeVersion: Host.localhost(), Device(path.device_major, path.device_minor), path.inode, + path.mode, ), numpy.datetime64(path.mtime.sec * int(1e9) + path.mtime.nsec, "ns"), path.size, @@ -144,6 +157,7 @@ def from_id_string(id_string: str) -> InodeVersion: Host.localhost(), Device(array[0], array[1]), array[2], + 0, ), numpy.datetime64(array[3] * int(1e9) + array[4], "ns"), array[5], diff --git a/tests/examples/.gitignore b/tests/examples/.gitignore index 367d1142..6d955428 100644 --- a/tests/examples/.gitignore +++ b/tests/examples/.gitignore @@ -1,2 +1,3 @@ *.exe *.so +*.o diff --git a/tests/examples/Makefile b/tests/examples/Makefile index 59336412..b22ed295 100644 --- a/tests/examples/Makefile +++ b/tests/examples/Makefile @@ -7,7 +7,7 @@ # If not, then PROBE does not interfere with ASAN. CC ?= clang -CFLAGS ?= -Wall -Wextra -Werror -Og -g +CFLAGS ?= -Wall -Wextra -Werror -Og -g -Wno-unused-command-line-argument SOURCE_FILES := $(wildcard *.c) diff --git a/tests/examples/libprobe_without_wrapper.py b/tests/examples/libprobe_without_wrapper.py index abf7d6cc..7ecd1977 100755 --- a/tests/examples/libprobe_without_wrapper.py +++ b/tests/examples/libprobe_without_wrapper.py @@ -100,8 +100,9 @@ def new(libprobe_path: pathlib.Path, copy_files: CopyFilesMode) -> ProcessContex f"--eval-command=set environment LD_PRELOAD {libprobe!s}", f"--eval-command=set environment PROBE_DIR {probe_dir!s}", f"--eval-command=set environment LD_DEBUG {LD_DEBUG}", - "--eval-command=run", - "--eval-command=backtrace", + "--eval-command=set startup-with-shell off", + "--eval-command=starti", + # "--eval-command=backtrace", "--args", cmd, *args, diff --git a/tests/examples/mmap_cat.c b/tests/examples/mmap_cat.c index 55c4a957..2a6d7af8 100644 --- a/tests/examples/mmap_cat.c +++ b/tests/examples/mmap_cat.c @@ -30,7 +30,7 @@ int main (int argc, char **argv) { char* buffer = (char *) mmap(NULL, statx_result.stx_size, PROT_READ, MAP_SHARED, fd, 0); if (buffer == NULL || buffer == MAP_FAILED) { - fprintf(stderr, "Could not mmap fd=%d /* \"%s\" */, size=%lld\n", fd, argv[1], statx_result.stx_size); + fprintf(stderr, "Could not mmap fd=%d /* \"%s\" */, size=%lld\n", fd, argv[1], (unsigned long long)statx_result.stx_size); perror("mmap"); exit(1); } diff --git a/tests/test_record.py b/tests/test_record.py index 5116297c..66b195c5 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -167,9 +167,15 @@ def scratch_directory( @pytest.mark.parametrize("copy_files", [ "none", "lazily", - "eagerly", + # "eagerly", +]) +@pytest.mark.parametrize("debug", [ + False, + # True, +], ids=[ + "opt", + # "dbg", ]) -@pytest.mark.parametrize("debug", [False, True], ids=["opt", "dbg"]) @pytest.mark.parametrize( "command", {**simple_commands, **complex_commands}.values(),