Skip to content

Commit

Permalink
Implement the DT_GNU_HASH mechanism for vDSO parsing. (#1259)
Browse files Browse the repository at this point in the history
* Implement the `DT_GNU_HASH` mechanism for vDSO parsing.

Linux recently [removed] the `DT_HASH` section from the aarch64 vDSO. To
continue to be able to read vDSOs, implement the `DT_GNU_HASH` section,
following the logic in [this patch].

[removed]: torvalds/linux@48f6430
[this patch]: https://lkml.org/lkml/2024/12/6/828

* Fix compilation on s390x with experimental asm.

* Fix pointer arithmetic on s390x.

* Add comments.

* Add more vDSO tests, and enable vDSO getcpu on s390x.
  • Loading branch information
sunfishcode authored Jan 13, 2025
1 parent 75ba0be commit 2268402
Show file tree
Hide file tree
Showing 3 changed files with 255 additions and 75 deletions.
6 changes: 4 additions & 2 deletions src/backend/linux_raw/process/syscalls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ use {crate::backend::conv::slice_just_addr_mut, crate::process::Gid};
target_arch = "x86_64",
target_arch = "x86",
target_arch = "riscv64",
target_arch = "powerpc64"
target_arch = "powerpc64",
target_arch = "s390x"
))]
pub(crate) use crate::backend::vdso_wrappers::sched_getcpu;

Expand All @@ -50,7 +51,8 @@ pub(crate) use crate::backend::vdso_wrappers::sched_getcpu;
target_arch = "x86_64",
target_arch = "x86",
target_arch = "riscv64",
target_arch = "powerpc64"
target_arch = "powerpc64",
target_arch = "s390x"
)))]
#[inline]
pub(crate) fn sched_getcpu() -> usize {
Expand Down
291 changes: 229 additions & 62 deletions src/backend/linux_raw/vdso.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
//! with Creative Commons Zero License, version 1.0,
//! available at <https://creativecommons.org/publicdomain/zero/1.0/legalcode>
//!
//! It also incorporates the patch at:
//! <https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git/commit/tools/testing/selftests/vDSO?h=next&id=01587d80b04f29747b6fd6d766c3bfa632f14eb0>,
//! with changes to fix the pointer arithmetic on s390x.
//!
//! # Safety
//!
//! Parsing the vDSO involves a lot of raw pointer manipulation. This
Expand Down Expand Up @@ -34,6 +38,7 @@ pub(super) struct Vdso {
// Symbol table
symtab: *const Elf_Sym,
symstrings: *const u8,
gnu_hash: *const u32,
bucket: *const ElfHashEntry,
chain: *const ElfHashEntry,
nbucket: ElfHashEntry,
Expand All @@ -60,6 +65,16 @@ fn elf_hash(name: &CStr) -> u32 {
h
}

fn gnu_hash(name: &CStr) -> u32 {
let mut h: u32 = 5381;
for s in name.to_bytes() {
h = h
.wrapping_add(h.wrapping_mul(32))
.wrapping_add(u32::from(*s));
}
h
}

/// Create a `Vdso` value by parsing the vDSO at the `sysinfo_ehdr` address.
fn init_from_sysinfo_ehdr() -> Option<Vdso> {
// SAFETY: The auxv initialization code does extensive checks to ensure
Expand All @@ -80,6 +95,7 @@ fn init_from_sysinfo_ehdr() -> Option<Vdso> {
pv_offset: 0,
symtab: null(),
symstrings: null(),
gnu_hash: null(),
bucket: null(),
chain: null(),
nbucket: 0,
Expand Down Expand Up @@ -159,6 +175,11 @@ fn init_from_sysinfo_ehdr() -> Option<Vdso> {
)?
.as_ptr();
}
DT_GNU_HASH => {
vdso.gnu_hash =
check_raw_pointer::<u32>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)?
.as_ptr()
}
DT_VERSYM => {
vdso.versym =
check_raw_pointer::<u16>(vdso.addr_from_elf(d.d_un.d_ptr)? as *mut _)?
Expand All @@ -183,7 +204,10 @@ fn init_from_sysinfo_ehdr() -> Option<Vdso> {
// `check_raw_pointer` will have checked these pointers for null,
// however they could still be null if the expected dynamic table
// entries are absent.
if vdso.symstrings.is_null() || vdso.symtab.is_null() || hash.is_null() {
if vdso.symstrings.is_null()
|| vdso.symtab.is_null()
|| (hash.is_null() && vdso.gnu_hash.is_null())
{
return None; // Failed
}

Expand All @@ -192,10 +216,21 @@ fn init_from_sysinfo_ehdr() -> Option<Vdso> {
}

// Parse the hash table header.
vdso.nbucket = *hash.add(0);
//vdso.nchain = *hash.add(1);
vdso.bucket = hash.add(2);
vdso.chain = hash.add(vdso.nbucket as usize + 2);
if !vdso.gnu_hash.is_null() {
vdso.nbucket = ElfHashEntry::from(*vdso.gnu_hash);
// The bucket array is located after the header (4 uint32) and the bloom
// filter (size_t array of gnu_hash[2] elements).
vdso.bucket = vdso
.gnu_hash
.add(4)
.add(size_of::<c::size_t>() / 4 * *vdso.gnu_hash.add(2) as usize)
.cast();
} else {
vdso.nbucket = *hash.add(0);
//vdso.nchain = *hash.add(1);
vdso.bucket = hash.add(2);
vdso.chain = hash.add(vdso.nbucket as usize + 2);
}

// That's all we need.
Some(vdso)
Expand Down Expand Up @@ -261,49 +296,110 @@ impl Vdso {
&& (name == CStr::from_ptr(self.symstrings.add(aux.vda_name as usize).cast()))
}

/// Check to see if the symbol is the one we're looking for.
///
/// # Safety
///
/// The raw pointers inside `self` must be valid.
unsafe fn check_sym(
&self,
sym: &Elf_Sym,
i: ElfHashEntry,
name: &CStr,
version: &CStr,
ver_hash: u32,
) -> bool {
// Check for a defined global or weak function w/ right name.
//
// Accept `STT_NOTYPE` in addition to `STT_FUNC` for the symbol
// type, for compatibility with some versions of Linux on
// PowerPC64. See [this commit] in Linux for more background.
//
// [this commit]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/tools/testing/selftests/vDSO/parse_vdso.c?id=0161bd38c24312853ed5ae9a425a1c41c4ac674a
if ELF_ST_TYPE(sym.st_info) != STT_FUNC && ELF_ST_TYPE(sym.st_info) != STT_NOTYPE {
return false;
}
if ELF_ST_BIND(sym.st_info) != STB_GLOBAL && ELF_ST_BIND(sym.st_info) != STB_WEAK {
return false;
}
if name != CStr::from_ptr(self.symstrings.add(sym.st_name as usize).cast()) {
return false;
}

// Check symbol version.
if !self.versym.is_null()
&& !self.match_version(*self.versym.add(i as usize), version, ver_hash)
{
return false;
}

true
}

/// Look up a symbol in the vDSO.
pub(super) fn sym(&self, version: &CStr, name: &CStr) -> *mut c::c_void {
let ver_hash = elf_hash(version);
let name_hash = elf_hash(name);

// SAFETY: The pointers in `self` must be valid.
unsafe {
let mut chain = *self
.bucket
.add((ElfHashEntry::from(name_hash) % self.nbucket) as usize);

while chain != ElfHashEntry::from(STN_UNDEF) {
let sym = &*self.symtab.add(chain as usize);

// Check for a defined global or weak function w/ right name.
//
// Accept `STT_NOTYPE` in addition to `STT_FUNC` for the symbol
// type, for compatibility with some versions of Linux on
// PowerPC64. See [this commit] in Linux for more background.
//
// [this commit]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/tools/testing/selftests/vDSO/parse_vdso.c?id=0161bd38c24312853ed5ae9a425a1c41c4ac674a
if (ELF_ST_TYPE(sym.st_info) != STT_FUNC &&
ELF_ST_TYPE(sym.st_info) != STT_NOTYPE)
|| (ELF_ST_BIND(sym.st_info) != STB_GLOBAL
&& ELF_ST_BIND(sym.st_info) != STB_WEAK)
|| sym.st_shndx == SHN_UNDEF
|| sym.st_shndx == SHN_ABS
|| ELF_ST_VISIBILITY(sym.st_other) != STV_DEFAULT
|| (name != CStr::from_ptr(self.symstrings.add(sym.st_name as usize).cast()))
// Check symbol version.
|| (!self.versym.is_null()
&& !self.match_version(*self.versym.add(chain as usize), version, ver_hash))
{
chain = *self.chain.add(chain as usize);
continue;
if !self.gnu_hash.is_null() {
let mut h1: u32 = gnu_hash(name);

// Changes to fix the pointer arithmetic on s390x: cast
// `self.bucket` to `*const u32` here, because even though
// s390x's `ElfHashEntry` is 64-bit for `DT_HASH` tables,
// it uses 32-bit entries for `DT_GNU_HASH` tables.
let mut i = *self
.bucket
.cast::<u32>()
.add((ElfHashEntry::from(h1) % self.nbucket) as usize);
if i == 0 {
return null_mut();
}
h1 |= 1;
// Changes to fix the pointer arithmetic on s390x: As above,
// cast `self.bucket` to `*const u32`.
let mut hashval = self
.bucket
.cast::<u32>()
.add(self.nbucket as usize)
.add((i - *self.gnu_hash.add(1)) as usize);
loop {
let sym: &Elf_Sym = &*self.symtab.add(i as usize);
let h2 = *hashval;
hashval = hashval.add(1);
if h1 == (h2 | 1)
&& self.check_sym(sym, ElfHashEntry::from(i), name, version, ver_hash)
{
let sum = self.addr_from_elf(sym.st_value).unwrap();
assert!(
sum as usize >= self.load_addr as usize
&& sum as usize <= self.load_end as usize
);
return sum as *mut c::c_void;
}
if (h2 & 1) != 0 {
break;
}
i += 1;
}
} else {
let mut i = *self
.bucket
.add((ElfHashEntry::from(elf_hash(name)) % self.nbucket) as usize);
while i != 0 {
let sym: &Elf_Sym = &*self.symtab.add(i as usize);
if sym.st_shndx != SHN_UNDEF && self.check_sym(sym, i, name, version, ver_hash)
{
let sum = self.addr_from_elf(sym.st_value).unwrap();
assert!(
sum as usize >= self.load_addr as usize
&& sum as usize <= self.load_end as usize
);
return sum as *mut c::c_void;
}
i = *self.chain.add(i as usize);
}

let sum = self.addr_from_elf(sym.st_value).unwrap();
assert!(
sum as usize >= self.load_addr as usize
&& sum as usize <= self.load_end as usize
);
return sum as *mut c::c_void;
}
}

Expand All @@ -324,32 +420,103 @@ impl Vdso {
}
}

// Disable on MIPS since QEMU on MIPS doesn't provide a vDSO.
#[cfg(linux_raw)]
#[test]
#[ignore] // Until rustix is updated to the new vDSO format.
#[cfg_attr(any(target_arch = "mips", target_arch = "mips64"), ignore)]
fn test_vdso() {
let vdso = Vdso::new().unwrap();
assert!(!vdso.symtab.is_null());
assert!(!vdso.symstrings.is_null());

#[cfg(target_arch = "x86_64")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));
#[cfg(target_arch = "arm")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
#[cfg(target_arch = "aarch64")]
let ptr = vdso.sym(cstr!("LINUX_2.6.39"), cstr!("__kernel_clock_gettime"));
#[cfg(target_arch = "x86")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
#[cfg(target_arch = "riscv64")]
let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_clock_gettime"));
#[cfg(target_arch = "powerpc64")]
let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_clock_gettime"));
#[cfg(target_arch = "s390x")]
let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_clock_gettime"));
#[cfg(any(target_arch = "mips", target_arch = "mips32r6"))]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
#[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));

assert!(!ptr.is_null());
{
#[cfg(target_arch = "x86_64")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));
#[cfg(target_arch = "arm")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
#[cfg(target_arch = "aarch64")]
let ptr = vdso.sym(cstr!("LINUX_2.6.39"), cstr!("__kernel_clock_gettime"));
#[cfg(target_arch = "x86")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
#[cfg(target_arch = "riscv64")]
let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_clock_gettime"));
#[cfg(target_arch = "powerpc64")]
let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_clock_gettime"));
#[cfg(target_arch = "s390x")]
let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_clock_gettime"));
#[cfg(any(target_arch = "mips", target_arch = "mips32r6"))]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime64"));
#[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_gettime"));

assert!(!ptr.is_null());
}

{
#[cfg(target_arch = "x86_64")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_getres"));
#[cfg(target_arch = "arm")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_getres"));
#[cfg(target_arch = "aarch64")]
let ptr = vdso.sym(cstr!("LINUX_2.6.39"), cstr!("__kernel_clock_getres"));
#[cfg(target_arch = "x86")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_getres"));
#[cfg(target_arch = "riscv64")]
let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_clock_getres"));
#[cfg(target_arch = "powerpc64")]
let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_clock_getres"));
#[cfg(target_arch = "s390x")]
let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_clock_getres"));
#[cfg(any(target_arch = "mips", target_arch = "mips32r6"))]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_getres"));
#[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_clock_getres"));

assert!(!ptr.is_null());
}

{
#[cfg(target_arch = "x86_64")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_gettimeofday"));
#[cfg(target_arch = "arm")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_gettimeofday"));
#[cfg(target_arch = "aarch64")]
let ptr = vdso.sym(cstr!("LINUX_2.6.39"), cstr!("__kernel_gettimeofday"));
#[cfg(target_arch = "x86")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_gettimeofday"));
#[cfg(target_arch = "riscv64")]
let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_gettimeofday"));
#[cfg(target_arch = "powerpc64")]
let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_gettimeofday"));
#[cfg(target_arch = "s390x")]
let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_gettimeofday"));
#[cfg(any(target_arch = "mips", target_arch = "mips32r6"))]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_gettimeofday"));
#[cfg(any(target_arch = "mips64", target_arch = "mips64r6"))]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_gettimeofday"));

assert!(!ptr.is_null());
}

#[cfg(any(
target_arch = "x86_64",
target_arch = "x86",
target_arch = "riscv64",
target_arch = "powerpc64",
target_arch = "s390x",
))]
{
#[cfg(target_arch = "x86_64")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_getcpu"));
#[cfg(target_arch = "x86")]
let ptr = vdso.sym(cstr!("LINUX_2.6"), cstr!("__vdso_getcpu"));
#[cfg(target_arch = "riscv64")]
let ptr = vdso.sym(cstr!("LINUX_4.15"), cstr!("__vdso_getcpu"));
#[cfg(target_arch = "powerpc64")]
let ptr = vdso.sym(cstr!("LINUX_2.6.15"), cstr!("__kernel_getcpu"));
#[cfg(target_arch = "s390x")]
let ptr = vdso.sym(cstr!("LINUX_2.6.29"), cstr!("__kernel_getcpu"));

assert!(!ptr.is_null());
}
}
Loading

0 comments on commit 2268402

Please sign in to comment.