diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 6c110da45..6d844da98 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -586,9 +586,11 @@ jobs: env: # SCCACHE_GHA_ENABLED: "on" ROCM_PATH: "/opt/rocm" + RANDOMIZE_READDIR_LOG: "/tmp/readdir.log" steps: - - uses: actions/checkout@v4 + - name: Clone repository + uses: actions/checkout@v4 # I don't want to break the cache during testing. Will turn on after I # make sure it's working. @@ -606,6 +608,24 @@ jobs: # script: | # core.exportVariable('ROCM_PATH', process.env.ROCM_PATH || ''); + - name: Install dependencies + shell: bash + run: | + ## Install dependencies + sudo apt-get update + sudo apt-get install -y cmake curl + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain none -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + + - name: Install rust + uses: ./.github/actions/rust-toolchain + with: + toolchain: "stable" + + - name: Build & setup librandomize_readdir + run: | + cargo build -p randomize_readdir + - uses: actions/download-artifact@v4 with: name: integration-tests @@ -613,37 +633,44 @@ jobs: - name: Chmod for binary run: chmod +x ${SCCACHE_PATH} - - name: Install dependencies - shell: bash - run: | - ## Install dependencies - sudo apt-get update - sudo apt-get install -y cmake - # Ensure that HIPCC isn't already borken - name: Sanity Check run: | + export LD_PRELOAD=$PWD/target/debug/librandomize_readdir.so hipcc -o vectoradd_hip --offload-arch=gfx900 tests/cmake-hip/vectoradd_hip.cpp - name: Test run: | + export LD_PRELOAD=$PWD/target/debug/librandomize_readdir.so + rm "$RANDOMIZE_READDIR_LOG".* cmake -B build -S tests/cmake-hip -DCMAKE_HIP_COMPILER_LAUNCHER=${SCCACHE_PATH} -DCMAKE_HIP_ARCHITECTURES=gfx900 cmake --build build + if ! grep -q bitcode "$RANDOMIZE_READDIR_LOG".*; then + echo "amdgcn bitcode not accessed, is librandomize_readdir properly set up?" + exit 1 + fi - name: Output run: | + export LD_PRELOAD=$PWD/target/debug/librandomize_readdir.so ${SCCACHE_PATH} --show-stats - name: Test Twice for Cache Read run: | + export LD_PRELOAD=$PWD/target/debug/librandomize_readdir.so + rm "$RANDOMIZE_READDIR_LOG".* rm -rf build cmake -B build -S tests/cmake-hip -DCMAKE_HIP_COMPILER_LAUNCHER=${SCCACHE_PATH} -DCMAKE_HIP_ARCHITECTURES=gfx900 cmake --build build + if ! grep -q bitcode "$RANDOMIZE_READDIR_LOG".*; then + echo "amdgcn bitcode not accessed, is librandomize_readdir properly set up?" + exit 1 + fi - name: Output run: | + export LD_PRELOAD=$PWD/target/debug/librandomize_readdir.so ${SCCACHE_PATH} --show-stats - ${SCCACHE_PATH} --show-stats | grep -e "Cache hits\s*[1-9]" gcc: diff --git a/Cargo.lock b/Cargo.lock index ad215de36..d109845b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -577,6 +577,16 @@ dependencies = [ "typenum", ] +[[package]] +name = "ctor" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" +dependencies = [ + "quote", + "syn 2.0.48", +] + [[package]] name = "daemonize" version = "0.5.0" @@ -2117,6 +2127,18 @@ dependencies = [ "getrandom", ] +[[package]] +name = "randomize_readdir" +version = "0.1.0" +dependencies = [ + "ctor", + "libc", + "log", + "once_cell", + "rand", + "simplelog", +] + [[package]] name = "redis" version = "0.27.5" @@ -2837,6 +2859,17 @@ dependencies = [ "time", ] +[[package]] +name = "simplelog" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16257adbfaef1ee58b1363bdc0664c9b8e1e30aed86049635fb5f147d065a9c0" +dependencies = [ + "log", + "termcolor", + "time", +] + [[package]] name = "slab" version = "0.4.9" diff --git a/Cargo.toml b/Cargo.toml index c3f23d6f4..5595dd966 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -205,3 +205,4 @@ dist-tests = ["dist-client", "dist-server"] [workspace] exclude = ["tests/test-crate"] +members = ["tests/randomize_readdir"] diff --git a/src/compiler/c.rs b/src/compiler/c.rs index 9207e8d1a..50cc6b85d 100644 --- a/src/compiler/c.rs +++ b/src/compiler/c.rs @@ -285,10 +285,13 @@ where .read_dir() .ok() .map(|f| { - f.flatten() + let mut device_libs = f + .flatten() .filter(|f| f.path().extension().is_some_and(|ext| ext == "bc")) .map(|f| f.path()) - .collect() + .collect::>(); + device_libs.sort_unstable(); + device_libs }) .unwrap_or_default() } diff --git a/tests/randomize_readdir/Cargo.toml b/tests/randomize_readdir/Cargo.toml new file mode 100644 index 000000000..08016e0ac --- /dev/null +++ b/tests/randomize_readdir/Cargo.toml @@ -0,0 +1,15 @@ +[package] +edition = "2021" +name = "randomize_readdir" +version = "0.1.0" + +[dependencies] +ctor = "0.2" +libc = "0.2.99" +log = "0.4" +once_cell = "1" +rand = "0.8" +simplelog = "0.12" + +[lib] +crate-type = ["cdylib"] diff --git a/tests/randomize_readdir/src/lib.rs b/tests/randomize_readdir/src/lib.rs new file mode 100644 index 000000000..dae1cd4b6 --- /dev/null +++ b/tests/randomize_readdir/src/lib.rs @@ -0,0 +1,272 @@ +// Copyright 2024 Mozilla Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! This library implements a shim that randomizes the results of readdir +//! and readdir64 for testing purposes. This is done by overriding the +//! posix calls associated with reading directories; opendir, fdopendir, +//! readdir, readdir64, and closedir. +//! +//! When readdir or readdir64 is first invoked, the shim will read the +//! entire directory into a vector, shuffle it, and store iteration +//! state inside a custom DirentIterator structure. Note that we +//! assume that no new entries will be added to the directory while +//! iterating, to keep things simple. Also keep in mind that calls to +//! any of the directory reading operations can come from different +//! threads, so the library state has to be kept in thread safe types +//! where appropriate. +//! +//! Calls are dispatched to the "real" implementation in libc by using +//! dlopen with RTLD_NEXT. Unfortunately it seems that the usual libraries +//! for this like libloading do not support RTLD_NEXT, so these +//! functions are just invoked using unsafe calls. +//! +//! To use this library, set LD_PRELOAD=path/to/librandomize_readdir.so. +//! You can verify that the output is random by running for example +//! `LD_PRELOAD=path/to/librandomize_readdir.so ls -U`. +//! +//! To test sccache with librandomize_readdir, export LD_PRELOAD in +//! the integration test and then check that two the second invocation +//! hits the cache. If not, something inside sccache relies implicitly +//! on the order that files are returned from the filesystem, which is +//! not defined, which is not ideal. + +use ctor::ctor; +use libc::{c_char, c_int, c_void, dirent, dirent64, dlsym, DIR, RTLD_NEXT}; +use log::{error, info}; +use once_cell::sync::OnceCell; +use rand::seq::SliceRandom; +use rand::thread_rng; +use simplelog::{Config, LevelFilter, WriteLogger}; +use std::collections::HashMap; +use std::env; +use std::ffi::CStr; +use std::fs::File; +use std::process; +use std::sync::RwLock; + +type Opendir = unsafe extern "C" fn(dirname: *const c_char) -> *mut DIR; +type Fdopendir = unsafe extern "C" fn(fd: c_int) -> *mut DIR; +type Readdir = unsafe extern "C" fn(dirp: *mut DIR) -> *mut dirent; +type Readdir64 = unsafe extern "C" fn(dirp: *mut DIR) -> *mut dirent64; +type Closedir = unsafe extern "C" fn(dirp: *mut DIR) -> c_int; + +struct DirentIterator { + entries: Vec, + index: usize, +} + +impl Iterator for DirentIterator { + type Item = *mut Dirent; + + fn next(&mut self) -> Option { + if self.index >= self.entries.len() { + return None; + } + + let ptr = &mut self.entries[self.index]; + self.index += 1; + Some(ptr) + } +} + +struct ReaddirState { + iter: Option>, + iter64: Option>, +} + +struct State { + opendir: Opendir, + fdopendir: Fdopendir, + readdir: Readdir, + readdir64: Readdir64, + closedir: Closedir, + + dirs: RwLock>, +} + +impl State { + fn new_opendir(&self, dirp: *mut DIR) { + self.dirs.write().expect("lock poisoned").insert( + dirp as usize, + ReaddirState { + iter: None, + iter64: None, + }, + ); + } + + fn wrapped_readdir_inner( + &self, + dirp: *mut DIR, + get_iter: GetIter, + readdir: Readdir, + ) -> *mut Dirent + where + Dirent: Copy, + GetIter: FnOnce(&mut ReaddirState) -> &mut Option>, + Readdir: Fn() -> *mut Dirent, + { + self.dirs + .write() + .expect("lock poisoned") + .get_mut(&(dirp as usize)) + .map(|dirstate| { + let iter = get_iter(dirstate); + if iter.is_none() { + let mut entries = Vec::new(); + + loop { + let entry = readdir(); + if entry.is_null() { + break; + } + + entries.push(unsafe { *entry }); + } + + entries.shuffle(&mut thread_rng()); + + *iter = Some(DirentIterator { entries, index: 0 }) + } + + let iter = iter.as_mut().unwrap(); + info!( + "{:p}: reading entry {}/{}", + dirp, + iter.index, + iter.entries.len() + ); + iter.next() + }) + .flatten() + .unwrap_or(std::ptr::null_mut()) + } + + fn wrapped_readdir(&self, dirp: *mut DIR) -> *mut dirent { + self.wrapped_readdir_inner( + dirp, + |dirstate| &mut dirstate.iter, + || unsafe { (self.readdir)(dirp) }, + ) + } + + fn wrapped_readdir64(&self, dirp: *mut DIR) -> *mut dirent64 { + self.wrapped_readdir_inner( + dirp, + |dirstate| &mut dirstate.iter64, + || unsafe { (self.readdir64)(dirp) }, + ) + } +} + +static STATE: OnceCell = OnceCell::new(); + +fn load_next(name: &[u8]) -> Prototype { + unsafe { + let name = CStr::from_bytes_with_nul(name).expect("invalid c-string literal"); + let sym = dlsym(RTLD_NEXT, name.as_ptr()); + if sym.is_null() { + error!("failed to load libc function {:?}", name.to_string_lossy()); + panic!("failed to load libc function pointer"); + } + + *(&sym as *const *mut c_void as *const Prototype) + } +} + +#[ctor] +fn init() { + if let Ok(path) = env::var("RANDOMIZE_READDIR_LOG") { + let path = format!("{}.{}", path, process::id()); + WriteLogger::init( + LevelFilter::Info, + Config::default(), + File::create(path).expect("failed to create log file"), + ) + .expect("failed to initialize logger"); + } + + // Force loading on module init. + let opendir = load_next::(b"opendir\0"); + let fdopendir = load_next::(b"fdopendir\0"); + let readdir = load_next::(b"readdir\0"); + let readdir64 = load_next::(b"readdir64\0"); + let closedir = load_next::(b"closedir\0"); + + _ = STATE.get_or_init(|| State { + opendir, + fdopendir, + readdir, + readdir64, + closedir, + dirs: RwLock::new(HashMap::new()), + }); +} + +#[no_mangle] +pub extern "C" fn opendir(dirname: *const c_char) -> *mut DIR { + let state = STATE.wait(); + let dirp = unsafe { (state.opendir)(dirname) }; + + info!( + "{:p}: opening directory '{}'", + dirp, + unsafe { CStr::from_ptr(dirname) }.to_string_lossy() + ); + + if !dirp.is_null() { + state.new_opendir(dirp); + } + + dirp +} + +#[no_mangle] +pub extern "C" fn fdopendir(dirfd: c_int) -> *mut DIR { + let state = STATE.wait(); + let dirp = unsafe { (state.fdopendir)(dirfd) }; + + info!("{:p}: opening directory fd {}", dirp, dirfd); + + if !dirp.is_null() { + state.new_opendir(dirp); + } + + dirp +} + +#[no_mangle] +pub extern "C" fn readdir(dirp: *mut DIR) -> *mut dirent { + STATE.wait().wrapped_readdir(dirp) +} + +#[no_mangle] +pub extern "C" fn readdir64(dirp: *mut DIR) -> *mut dirent64 { + STATE.wait().wrapped_readdir64(dirp) +} + +#[no_mangle] +pub extern "C" fn closedir(dirp: *mut DIR) -> c_int { + info!("{:p}: closing handle", dirp); + + let state = STATE.wait(); + + state + .dirs + .write() + .expect("lock poisoned") + .remove(&(dirp as usize)); + + unsafe { (state.closedir)(dirp) } +}