diff --git a/accounts-db/src/utils.rs b/accounts-db/src/utils.rs index fb0429f7b26..6152f7ffdd7 100644 --- a/accounts-db/src/utils.rs +++ b/accounts-db/src/utils.rs @@ -1,5 +1,7 @@ use { - agave_fs::dirs, + crate::accounts_db::AccountsDbConfig, + agave_fs::{dirs, metadata::DirectIoSupport}, + itertools::Itertools as _, log::*, solana_account::{AccountSharedData, ReadableAccount}, solana_measure::measure_time, @@ -163,6 +165,42 @@ pub fn create_account_shared_data(account: &impl ReadableAccount) -> AccountShar ) } +/// Check that given paths conform to requirements defined by `config`. +/// +/// Return `Err` if paths are impossible to access or do not support required features. +/// +/// This functions validates that paths reside on filesystem supporting configured operations +/// like direct-io. This allows providing meaningful error messages to user during startup +/// instead of generating hard to diagnose errors during runtime. +pub fn validate_account_paths_for_direct_io( + config: &AccountsDbConfig, + accounts_paths: &[PathBuf], + account_snapshot_paths: &[PathBuf], +) -> io::Result<()> { + if config.snapshots_use_direct_io { + let mut unsupported_paths = vec![]; + for path in accounts_paths.iter().chain(account_snapshot_paths.iter()) { + if agave_fs::metadata::check_direct_io_capability(path)? == DirectIoSupport::Unsupported + { + unsupported_paths.push(path); + } + } + if !unsupported_paths.is_empty() { + let paths_str = unsupported_paths.into_iter().map(|p| p.display()).join(","); + return Err(io::Error::new( + io::ErrorKind::Unsupported, + format!( + "direct-io (O_DIRECT) is not supported for paths `{paths_str}`. Ensure the \ + filesystem hosting that path supports direct-io, or disable direct-io with \ + --no-accounts-db-snapshots-direct-io flag.", + ), + )); + } + } + + Ok(()) +} + #[cfg(test)] mod tests { use {super::*, tempfile::TempDir}; diff --git a/core/src/validator.rs b/core/src/validator.rs index ebae2e98ee2..58f1d5ea1af 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -51,7 +51,7 @@ use { solana_accounts_db::{ accounts_db::{ACCOUNTS_DB_CONFIG_FOR_TESTING, AccountsDbConfig}, accounts_update_notifier_interface::AccountsUpdateNotifier, - utils::move_and_async_delete_path_contents, + utils::{move_and_async_delete_path_contents, validate_account_paths_for_direct_io}, }, solana_client::connection_cache::{ConnectionCache, Protocol}, solana_clock::Slot, @@ -831,12 +831,13 @@ impl Validator { let genesis_config = load_genesis(config, ledger_path)?; metrics_config_sanity_check(genesis_config.cluster_type)?; - info!("Cleaning accounts paths.."); + info!("Validating and cleaning accounts paths.."); *start_progress.write().unwrap() = ValidatorStartProgress::CleaningAccounts; - let mut timer = Measure::start("clean_accounts_paths"); + let mut timer = Measure::start("validate_and_clean_accounts_paths"); + validate_account_paths(config)?; cleanup_accounts_paths(config); timer.stop(); - info!("Cleaning accounts paths done. {timer}"); + info!("Validating and cleaning accounts paths done. {timer}"); snapshot_utils::purge_incomplete_bank_snapshots(&config.snapshot_config.bank_snapshots_dir); snapshot_utils::purge_old_bank_snapshots_at_startup( @@ -2920,6 +2921,14 @@ fn cleanup_accounts_paths(config: &ValidatorConfig) { } } +fn validate_account_paths(config: &ValidatorConfig) -> std::io::Result<()> { + validate_account_paths_for_direct_io( + &config.accounts_db_config, + &config.account_paths, + &config.account_snapshot_paths, + ) +} + pub fn is_snapshot_config_valid(snapshot_config: &SnapshotConfig) -> bool { // if the snapshot config is configured to *not* take snapshots, then it is valid if !snapshot_config.should_generate_snapshots() { diff --git a/fs/src/lib.rs b/fs/src/lib.rs index c659cfd733c..5163f35d903 100644 --- a/fs/src/lib.rs +++ b/fs/src/lib.rs @@ -14,6 +14,7 @@ mod file_info; pub mod file_io; pub mod io_setup; mod io_uring; +pub mod metadata; pub use file_info::FileInfo; diff --git a/fs/src/metadata.rs b/fs/src/metadata.rs new file mode 100644 index 00000000000..37833d984ad --- /dev/null +++ b/fs/src/metadata.rs @@ -0,0 +1,205 @@ +/// Utilities for querying filesystem metadata, including direct I/O support. +#[cfg(all(target_os = "linux", not(target_env = "musl")))] +use std::{ffi::CString, mem}; +#[cfg(target_os = "linux")] +use std::{fs, path::PathBuf}; +use std::{io, path::Path}; + +/// Indicates whether a filesystem path supports direct I/O (opening files with `O_DIRECT` flag). +#[derive(Debug, PartialEq, Eq)] +pub enum DirectIoSupport { + /// The filesystem does not support direct I/O. + Unsupported, + /// The filesystem supports direct I/O. + Supported, + /// Support could not be determined (e.g. check is not implemented on this platform). + Uncertain, +} + +/// Returns whether `path` (a file or directory) resides on a filesystem that supports +/// direct I/O (`O_DIRECT`). +/// +/// Returns `Ok(Supported)` if direct I/O is supported, `Ok(Unsupported)` if it is not, +/// or `Ok(Uncertain)` if no conclusion can be drawn (e.g. an empty directory or a path +/// that does not exist). +/// +/// On Linux: resolves a concrete file under `path` first, then attempts a `statx(2)`-based +/// check; if the kernel does not support it (< 6.1), falls back to an open-probe check. +/// On non-Linux platforms: always returns `Ok(Uncertain)`. +#[cfg(target_os = "linux")] +pub fn check_direct_io_capability(path: impl AsRef) -> io::Result { + let Some(file) = find_any_file_under_path(path.as_ref())? else { + return Ok(DirectIoSupport::Uncertain); + }; + // statx with STATX_DIOALIGN is the preferred check, but libc does not expose + // statx on musl (requires musl >= 1.2.3), so skip it there. + #[cfg(not(target_env = "musl"))] + { + let statx_result = check_direct_io_via_statx(&file); + if !matches!(&statx_result, Ok(DirectIoSupport::Uncertain)) { + return statx_result; + } + } + Ok(check_direct_io_via_open_probe(&file)) +} + +/// Always returns `Ok(Uncertain)`, since direct I/O functionality is not used on non-Linux. +#[cfg(not(target_os = "linux"))] +pub fn check_direct_io_capability(_path: impl AsRef) -> io::Result { + Ok(DirectIoSupport::Uncertain) +} + +/// Check direct I/O capability via `statx(2)` with `STATX_DIOALIGN`. +/// +/// Returns `Ok(Supported)` when `stx_dio_mem_align != 0` (DIO supported), +/// `Ok(Unsupported)` when `STATX_DIOALIGN` is set but DIO is not supported, or +/// `Ok(Uncertain)` when the kernel did not populate `STATX_DIOALIGN` fields (kernel < 6.1). +#[cfg(all(target_os = "linux", not(target_env = "musl")))] +fn check_direct_io_via_statx(path: &Path) -> io::Result { + let path_cstr = CString::new(path.as_os_str().as_encoded_bytes()) + .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "path contains a null byte"))?; + + let mut statx_info: libc::statx = unsafe { mem::zeroed() }; + let ret = unsafe { + libc::statx( + libc::AT_FDCWD, + path_cstr.as_ptr(), + // Follow symlinks; don't trigger automounts. + libc::AT_NO_AUTOMOUNT, + libc::STATX_DIOALIGN, + &mut statx_info, + ) + }; + + if ret != 0 { + // statx is available since 4.11, fail completely if it doesn't properly execute + return Err(io::Error::last_os_error()); + } + + // Kernel did not populate DIOALIGN fields - kernels < 6.1 silently ignore unknown mask bits + // and file systems without proper implementation of the check will also ignore it. + let supported = if statx_info.stx_mask & libc::STATX_DIOALIGN == 0 { + DirectIoSupport::Uncertain + } else if statx_info.stx_dio_mem_align != 0 { + // stx_dio_mem_align == 0 means the filesystem does not support DIO. + DirectIoSupport::Supported + } else { + DirectIoSupport::Unsupported + }; + Ok(supported) +} + +/// Check direct I/O capability by attempting to open `file` with `O_DIRECT`. +/// +/// Confirms the file is readable, then tries to reopen it with `O_DIRECT`. +/// Returns `Uncertain` if the file is not readable and no conclusion can be drawn. +#[cfg(target_os = "linux")] +fn check_direct_io_via_open_probe(file: &Path) -> DirectIoSupport { + use std::{fs::OpenOptions, os::unix::fs::OpenOptionsExt as _}; + + // Confirm the file is readable at all; if not, we cannot conclude anything. + if OpenOptions::new().read(true).open(file).is_err() { + return DirectIoSupport::Uncertain; + } + if OpenOptions::new() + .read(true) + .custom_flags(libc::O_DIRECT) + .open(file) + .is_ok() + { + DirectIoSupport::Supported + } else { + DirectIoSupport::Unsupported + } +} + +/// Returns a path to any regular file at or under `path`, recursively traversing +/// directories and returning as soon as one file is found. Returns `Ok(None)` if +/// no file exists under `path`. +#[cfg(target_os = "linux")] +fn find_any_file_under_path(path: &Path) -> io::Result> { + if path.is_file() { + return Ok(Some(path.to_path_buf())); + } + if path.is_dir() { + for entry in fs::read_dir(path)? { + let entry = entry?; + if let Some(path) = find_any_file_under_path(&entry.path())? { + return Ok(Some(path)); + } + } + } + Ok(None) +} + +#[cfg(all(test, target_os = "linux"))] +mod tests { + use {super::*, tempfile::TempDir}; + + fn make_temp_file(dir: &TempDir, name: &str, content: &[u8]) -> std::path::PathBuf { + let path = dir.path().join(name); + std::fs::write(&path, content).unwrap(); + path + } + + #[test] + fn test_find_any_file_under_path_file() { + let dir = TempDir::new().unwrap(); + let file = make_temp_file(&dir, "f.bin", b"hello"); + assert_eq!(find_any_file_under_path(&file).unwrap(), Some(file)); + } + + #[test] + fn test_find_any_file_under_path_dir() { + let dir = TempDir::new().unwrap(); + make_temp_file(&dir, "a.bin", b"data"); + let candidate = find_any_file_under_path(dir.path()).unwrap(); + assert!(candidate.is_some()); + assert!(candidate.unwrap().is_file()); + } + + #[test] + fn test_find_any_file_under_path_empty_dir() { + let dir = TempDir::new().unwrap(); + assert_eq!(find_any_file_under_path(dir.path()).unwrap(), None); + } + + #[test] + fn test_path_supports_direct_io_file() { + let dir = TempDir::new().unwrap(); + let file = make_temp_file(&dir, "probe.bin", &[0u8; 4096]); + let result = check_direct_io_capability(&file).expect("check must not fail"); + assert_eq!( + result, + DirectIoSupport::Supported, + "dev filesystem must support direct I/O" + ); + } + + #[test] + fn test_path_supports_direct_io_dir() { + let dir = TempDir::new().unwrap(); + make_temp_file(&dir, "probe.bin", &[0u8; 4096]); + let result = check_direct_io_capability(dir.path()).expect("check must not fail"); + assert_eq!( + result, + DirectIoSupport::Supported, + "dev filesystem must support direct I/O" + ); + } + + #[test] + fn test_path_supports_direct_io_empty_dir() { + let dir = TempDir::new().unwrap(); + let result = check_direct_io_capability(dir.path()).expect("check must not fail"); + assert_eq!(result, DirectIoSupport::Uncertain); + } + + #[test] + fn test_path_supports_direct_io_nonexistent_path() { + let dir = TempDir::new().unwrap(); + let result = check_direct_io_capability(dir.path().join("does-not-exist")) + .expect("check must not fail"); + assert_eq!(result, DirectIoSupport::Uncertain); + } +} diff --git a/ledger-tool/src/ledger_utils.rs b/ledger-tool/src/ledger_utils.rs index ac09c642714..1ef714c439e 100644 --- a/ledger-tool/src/ledger_utils.rs +++ b/ledger-tool/src/ledger_utils.rs @@ -10,6 +10,7 @@ use { log::*, solana_accounts_db::utils::{ create_all_accounts_run_and_snapshot_dirs, move_and_async_delete_path_contents, + validate_account_paths_for_direct_io, }, solana_clock::Slot, solana_core::validator::{ @@ -106,6 +107,9 @@ pub(crate) enum LoadAndProcessLedgerError { #[error("failed to process blockstore from root: {0}")] ProcessBlockstoreFromRoot(#[source] BlockstoreProcessorError), + + #[error("failed to validate account paths: {0}")] + ValidateAccountPaths(#[source] std::io::Error), } pub fn load_and_process_ledger_or_exit( @@ -253,6 +257,13 @@ pub fn load_and_process_ledger( // From now on, use run/ paths in the same way as the previous account_paths. let account_paths = account_run_paths; + validate_account_paths_for_direct_io( + &process_options.accounts_db_config, + &account_paths, + &account_snapshot_paths, + ) + .map_err(LoadAndProcessLedgerError::ValidateAccountPaths)?; + let (_, measure_clean_account_paths) = measure_time!( account_paths.iter().for_each(|path| { if path.exists() {