cyphar · cyphar · Feb 12, 2026 · Feb 11, 2026 · Feb 12, 2026 · Feb 11, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -34,8 +34,22 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
   not a leak) but tools that search for file descriptor leaks (such as runc's
   test suite) could incorrectly classify this as a leak. We now close this
   `ProcfsBase` handle far more aggressively.
+- RHEL 8 kernels have backports of the fd-based mount API (`fsopen(2)`,
+  `open_tree(2)`, et al.) but some `runc` testing found that they have very bad
+  (and very difficult to debug) performance issues. Thus, to avoid broken
+  backports libpathrs will now explicitly refuse to use the fd-based mount API
+  if the reported kernel version is pre-5.2 and will instead fallback to the
+  less-secure `open("/proc")`.
+- libpathrs [0.2.0][] added some `fdinfo`-based hardening to the procfs
+  resolver when `openat2` is not available. Unfortunately, one aspect of this
+  hardening had a hard requirement on [a kernel feature only added in Linux
+  5.14][kcommit-3845f256a8b52] (namely the `ino` field in `fdinfo`) and thus
+  inadvertently increased our minimum kernel version requirement quite
+  significantly. This additional hardening is now only treated as mandatory if
+  the host kernel version is Linux 5.14 or newer.
 
 [rust-issue20267]: https://github.com/rust-lang/rust/issues/20267
+[kcommit-3845f256a8b52]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3845f256a8b527127bfbd4ced21e93d9e89aa6d7
 
 ## [0.2.3] - 2026-01-29 ##
 

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -75,7 +75,7 @@ once_cell = "1"
 # MSRV(1.65): Update to >=0.4.1 which uses let_else. 0.4.0 was broken.
 open-enum = { version = "0.3", optional = true }
 rand = { version = "0.9", optional = true }
-rustix = { version = "1.1", features = ["fs", "process", "thread", "mount"] }
+rustix = { version = "1.1", features = ["fs", "process", "system", "thread", "mount"] }
 rustversion = "1"
 thiserror = "2"
 static_assertions = "1.1"
@@ -89,6 +89,7 @@ tempfile = "3"
 paste = "1"
 path-clean = "1"
 pretty_assertions = { version = "1.4.1", features = ["unstable"] }
+scopeguard = "1"
 
 [build-dependencies]
 tempfile = "3"

diff --git a/src/error.rs b/src/error.rs
@@ -92,7 +92,7 @@ pub(crate) enum ErrorImpl {
     #[error("feature {feature} is not implemented")]
     NotImplemented { feature: Cow<'static, str> },
 
-    #[error("feature {feature} not supported on this kernel")]
+    #[error("feature {feature} not supported by the system")]
     NotSupported { feature: Cow<'static, str> },
 
     #[error("invalid {name} argument: {description}")]
@@ -220,7 +220,7 @@ impl ErrorKind {
     /// errno values where appropriate.
     pub(crate) fn errno(&self) -> Option<i32> {
         match self {
-            ErrorKind::NotImplemented => Some(libc::ENOSYS),
+            ErrorKind::NotImplemented | ErrorKind::NotSupported => Some(libc::ENOSYS),
             ErrorKind::InvalidArgument => Some(libc::EINVAL),
             #[cfg(feature = "capi")]
             ErrorKind::UnsupportedStructureData => Some(libc::E2BIG),

diff --git a/src/procfs.rs b/src/procfs.rs
@@ -63,7 +63,7 @@
     flags::{OpenFlags, ResolverFlags},
     resolvers::procfs::ProcfsResolver,
     syscalls,
-    utils::{self, FdExt, MaybeOwnedFd, RawProcfsRoot},
+    utils::{self, kernel_version, FdExt, MaybeOwnedFd, RawProcfsRoot},
 };
 
 use std::{
@@ -76,7 +76,7 @@
     path::{Path, PathBuf},
 };
 
-use once_cell::sync::OnceCell as OnceLock;
+use once_cell::sync::{Lazy, OnceCell as OnceLock};
 use rustix::{
     fs::{self as rustix_fs, Access, AtFlags},
     mount::{FsMountFlags, FsOpenFlags, MountAttrFlags, OpenTreeFlags},
@@ -232,9 +232,6 @@
     subset_pid: bool,
 }
 
-// MSRV(1.70): Use std::sync::OnceLock.
-static CACHED_PROCFS_HANDLE: OnceLock<OwnedFd> = OnceLock::new();
-
 impl Default for ProcfsHandleBuilder {
     fn default() -> Self {
         Self::new()
@@ -341,6 +338,9 @@
     /// panic as this is not a state that should be possible to reach in regular
     /// program execution.
     pub fn build(self) -> Result<ProcfsHandle, Error> {
+        // MSRV(1.70): Use std::sync::OnceLock.
+        static CACHED_PROCFS_HANDLE: OnceLock<OwnedFd> = OnceLock::new();
+
         // MSRV(1.85): Use let chain here (Rust 2024).
         if self.is_cache_friendly() {
             // If there is already a cached filesystem available, use that.
@@ -806,11 +806,26 @@
 /// [lwn-procfs-overmounts]: https://lwn.net/Articles/934460/
 pub type ProcfsHandle = ProcfsHandleRef<'static>;
 
+/// Indicates whether this kernel is new enough that it should have the
+/// upstream-merged version of the new mount API. This is necessary because
+/// testing in runc found that RHEL 8 appears to have a broken backport of the
+/// new mount API that causes serious performance regressions -- as such, we
+/// should simply refuse to even try to use any of the new mount APIs on pre-5.2
+/// kernels.
+// MSRV(1.80): Use LazyLock.
+static HAS_UNBROKEN_MOUNT_API: Lazy<bool> = Lazy::new(|| kernel_version::is_gte!(5, 2));
+
 impl ProcfsHandle {
     /// Create a new `fsopen(2)`-based [`ProcfsHandle`]. This handle is safe
     /// against racing attackers changing the mount table and is guaranteed to
     /// have no overmounts because it is a brand-new procfs.
     pub(crate) fn new_fsopen(subset: bool) -> Result<Self, Error> {
+        if !*HAS_UNBROKEN_MOUNT_API {
+            Err(ErrorImpl::NotSupported {
+                feature: "fsopen".into(),
+            })?
+        }
+
         let sfd = syscalls::fsopen("proc", FsOpenFlags::FSOPEN_CLOEXEC).map_err(|err| {
             ErrorImpl::RawOsError {
                 operation: "create procfs suberblock".into(),
@@ -852,6 +867,12 @@
     /// guaranteed to be safe against racing attackers, and will not have
     /// overmounts unless `flags` contains `OpenTreeFlags::AT_RECURSIVE`.
     pub(crate) fn new_open_tree(flags: OpenTreeFlags) -> Result<Self, Error> {
+        if !*HAS_UNBROKEN_MOUNT_API {
+            Err(ErrorImpl::NotSupported {
+                feature: "open_tree".into(),
+            })?
+        }
+
         syscalls::open_tree(
             syscalls::BADFD,
             "/proc",

diff --git a/src/syscalls.rs b/src/syscalls.rs
@@ -874,6 +874,36 @@ pub(crate) mod openat2 {
 
 pub(crate) use openat2::{openat2, openat2_follow, OpenHow, ResolveFlags};
 
+#[cfg(test)]
+mod personality {
+    // musl doesn't expose UNAME26.
+    #[cfg(not(target_env = "musl"))]
+    pub(crate) const PER_UNAME26: u32 = libc::UNAME26 as _;
+    #[cfg(target_env = "musl")]
+    pub(crate) const PER_UNAME26: u32 = 0x0020000; /* <linux/personality.h> */
+
+    pub(crate) fn personality(persona: Option<u32>) -> u32 {
+        unsafe { libc::personality(persona.unwrap_or(0xFFFF_FFFF) as _) as _ }
+    }
+
+    /// Temporarily change the personality of the running thread.
+    ///
+    /// The personality is reset to the original persona value (i.e., when
+    /// [`scoped_personality`] was first called) once the returned `impl Drop`
+    /// value is dropped. Note that any threads or subprocesses spawned with
+    /// the `scoped_personality` guard held will permanently inherit the
+    /// specified persona.
+    #[must_use]
+    pub(crate) fn scoped_personality(persona: u32) -> impl Drop {
+        scopeguard::guard(personality(Some(persona)), |old_persona| {
+            personality(Some(old_persona));
+        })
+    }
+}
+
+#[cfg(test)]
+pub(crate) use personality::*;
+
 #[cfg(test)]
 pub(crate) fn getpid() -> rustix_process::RawPid {
     rustix_process::Pid::as_raw(Some(rustix_process::getpid()))

diff --git a/src/utils.rs b/src/utils.rs
@@ -52,3 +52,5 @@ pub(crate) use maybe_owned::*;
 
 mod raw_procfs;
 pub(crate) use raw_procfs::*;
+
+pub(crate) mod kernel_version;
diff --git a/src/utils/fd.rs b/src/utils/fd.rs
@@ -317,9 +317,9 @@ impl<Fd: AsFd> FdExt for Fd {
     {
         let fd = self.as_fd();
         let fdinfo_path = match fd.as_raw_fd() {
-            // MSRV(1.66): Use ..=0 (half_open_range_patterns).
+            // MSRV(1.66): Use ..=-1 (half_open_range_patterns).
             // MSRV(1.80): Use ..0 (exclusive_range_pattern).
-            fd @ libc::AT_FDCWD | fd @ RawFd::MIN..=0 => Err(ErrorImpl::OsError {
+            fd @ libc::AT_FDCWD | fd @ RawFd::MIN..=-1 => Err(ErrorImpl::OsError {
                 operation: format!("get relative procfs fdinfo path for fd {fd}").into(),
                 source: IOError::from_raw_os_error(libc::EBADF),
             })?,

diff --git a/src/utils/fdinfo.rs b/src/utils/fdinfo.rs
@@ -32,7 +32,7 @@
 
 use crate::{
     error::{Error, ErrorExt, ErrorImpl, ErrorKind},
-    utils::FdExt,
+    utils::{kernel_version, FdExt},
 };
 
 use std::{
@@ -100,44 +100,60 @@
 {
     let fd = fd.as_fd();
 
-    // Verify that the "ino" field in fdinfo matches the real inode number
-    // of our file descriptor. This makes attacks harder (if not near
-    // impossible, outside of very constrained situations):
+    // Verify that the "ino" field in fdinfo matches the real inode number of
+    // our file descriptor. This makes attacks harder (if not near impossible,
+    // outside of very constrained situations):
     //
-    //  * An attacker would probably struggle to always accurately guess the inode
-    //    number of files that the process is trying to operate on. Yes, if they
-    //    know the victim process's access patterns of procfs they could probably
-    //    make an educated guess, but most files do not have stable inode numbers in
-    //    procfs.
+    // * An attacker would probably struggle to always accurately guess the inode
+    //   number of files that the process is trying to operate on. Yes, if they know
+    //   the victim process's access patterns of procfs they could probably make an
+    //   educated guess, but most files do not have stable inode numbers in procfs.
     //
-    //  * An attacker can no longer bind-mount their own fdinfo directory with just
-    //    a buch of handles to "/proc" open (assuming the attacker is trying to
-    //    spoof "mnt_id"), because the inode numbers won't match.
+    // * An attacker can no longer bind-mount their own fdinfo directory with just a
+    //   buch of handles to "/proc" open (assuming the attacker is trying to spoof
+    //   "mnt_id"), because the inode numbers won't match.
     //
-    //    They also can't really fake inode numbers in real procfs fdinfo
-    //    files, so they would need to create fake fdinfo files using
-    //    individual file arbitrary-data gadgets (like /proc/self/environ).
-    //    However, every program only has one environment so they would need
-    //    to create a new child process for every fd they are trying to
-    //    attack simultaneously (and accurately update their environment
-    //    data to avoid detection).
+    //   They also can't really fake inode numbers in real procfs fdinfo files,
+    //   so they would need to create fake fdinfo files using individual file
+    //   arbitrary-data gadgets (like /proc/self/environ). However, every
+    //   program only has one environment so they would need to create a new
+    //   child process for every fd they are trying to attack simultaneously
+    //   (and accurately update their environment data to avoid detection).
     //
-    // This isn't perfect protection by any means, but it's probably the
-    // best we can do for very old kernels (given the constraints). At the very
-    // least, it makes exploitation _much_ harder than if we didn't do anything
-    // at all.
+    // This isn't perfect protection by any means, but it's probably the best we
+    // can do for very old kernels (given the constraints). At the very least,
+    // it makes exploitation _much_ harder than if we didn't do anything at all.
     let actual_ino: u64 = fd.metadata().wrap("get inode number of fd")?.ino();
-    let fdinfo_ino: u64 =
+    let fdinfo_ino: Option<u64> =
         match parse_and_find_fdinfo_field(rdr, "ino").map_err(|err| (err.kind(), err)) {
-            Ok(Some(ino)) => Ok(ino),
-            // "ino" *must* exist as a field -- make sure we return a
-            // SafetyViolation here if it is missing or an invalid value
-            // (InternalError), otherwise an attacker could silence this check
-            // by creating a "ino"-less fdinfo.
+            Ok(Some(ino)) => Ok(Some(ino)),
+            Ok(None) => {
+                // Unfortunately, the "ino" field in fdinfo was only added in
+                // Linux 5.14 (see kcommit 3845f256a8b52 ("procfs/dmabuf: add
+                // inode number to /proc/*/fdinfo")) and so we cannot require
+                // this for such old kernels.
+                //
+                // However, *for post-5.14 kernels*, "ino" *must* exist as a
+                // field, so make sure we return a SafetyViolation here if it is
+                // missing. Otherwise an attacker could silence this check by
+                // creating a "ino"-less fdinfo.
+                //
+                // [kcommit-3845f256a8b52]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3845f256a8b527127bfbd4ced21e93d9e89aa6d7
+                if kernel_version::is_gte!(5, 14) {
+                    Err(ErrorImpl::SafetyViolation {
+                        description: format!(
+                            r#"fd {:?} has a fake fdinfo: missing "ino" field"#,
+                            fd.as_raw_fd(),
+                        )
+                        .into(),
+                    })?;
+                }
+                Ok(None)
+            }
             // TODO: Should we actually match for ErrorImpl::ParseIntError here?
-            Ok(None) | Err((ErrorKind::InternalError, _)) => Err(ErrorImpl::SafetyViolation {
+            Err((ErrorKind::InternalError, _)) => Err(ErrorImpl::SafetyViolation {
                 description: format!(
-                    r#"fd {:?} has a fake fdinfo: invalid or missing "ino" field"#,
+                    r#"fd {:?} has a fake fdinfo: invalid "ino" field"#,
                     fd.as_raw_fd(),
                 )
                 .into(),
@@ -146,14 +162,17 @@
             // Pass through any other errors.
             Err((_, err)) => Err(err),
         }?;
-    if actual_ino != fdinfo_ino {
-        Err(ErrorImpl::SafetyViolation {
+    // MSRV(1.85): Use let chain here (Rust 2024).
+    if let Some(fdinfo_ino) = fdinfo_ino {
+        if actual_ino != fdinfo_ino {
+            Err(ErrorImpl::SafetyViolation {
                 description: format!(
                     "fd {:?} has a fake fdinfo: wrong inode number (ino is {fdinfo_ino:X} not {actual_ino:X})",
                     fd.as_raw_fd()
                 )
                 .into(),
             })?;
+        }
     }
 
     // Reset the position in the fdinfo file, and re-parse it to look for
@@ -169,7 +188,7 @@
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::error::ErrorKind;
+    use crate::{error::ErrorKind, syscalls};
 
     use std::{
         fmt::Debug,
@@ -528,9 +547,13 @@
         Ok(())
     }
 
-    // Make sure that a missing "ino" entry also fails.
+    // Make sure that a missing "ino" entry also fails on new kernels.
     #[test]
-    fn fd_get_verify_fdinfo_no_ino() -> Result<(), Error> {
+    fn fd_get_verify_fdinfo_no_ino_kernel514() -> Result<(), Error> {
+        if !kernel_version::is_gte!(5, 14) {
+            return Ok(());
+        }
+
         const FAKE_FDINFO: &[u8] = indoc! {b"
             foo: abcdef
             mnt_id: 12345
@@ -565,6 +588,42 @@
         Ok(())
     }
 
+    // Make sure that a missing "ino" entry succeeds on old kernels (emulated).
+    #[test]
+    fn fd_get_verify_fdinfo_no_ino_oldkernel() -> Result<(), Error> {
+        // The UNAME26 personality lets us fake a pre-5.14 kernel version to
+        // kernel_version::is_gte.
+        let _persona_guard = syscalls::scoped_personality(syscalls::PER_UNAME26);
+
+        const FAKE_FDINFO: &[u8] = indoc! {b"
+            foo: abcdef
+            mnt_id: 12345
+        "};
+
+        let file = File::open(".").context("open dummy file")?;
+
+        check_fd_get_verify_fdinfo::<u64>(
+            &mut Cursor::new(&FAKE_FDINFO),
+            &file,
+            "mnt_id",
+            Ok(Some(12345)),
+        )
+        .expect(r#"get "mnt_id" from fdinfo with missing ino (pre-5.14)"#);
+
+        check_fd_get_verify_fdinfo::<u64>(&mut Cursor::new(&FAKE_FDINFO), &file, "ino", Ok(None))
+            .expect(r#"get "ino" from fdinfo with missing ino (pre-5.14)"#);
+
+        check_fd_get_verify_fdinfo::<String>(
+            &mut Cursor::new(&FAKE_FDINFO),
+            &file,
+            "non_exist",
+            Ok(None),
+        )
+        .expect(r#"get "non_exist" from fdinfo with missing ino (pre-5.14)"#);
+
+        Ok(())
+    }
+
     // Make sure that an "ino" entry with the wrong type results in a
     // SafetyViolation error, not an integer parsing error.
     #[test]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -52,3 +52,5 @@ pub(crate) use maybe_owned::*;

		mod raw_procfs;
		pub(crate) use raw_procfs::*;

		pub(crate) mod kernel_version;