Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
c5d9d25
Unpack files from snapshot with io_uring files creator
kskalski Jun 16, 2025
23b8dff
Fix test
kskalski Jun 23, 2025
9ebb4ea
Lint
kskalski Jun 23, 2025
1d394ca
Update dep. Fix lint.
kskalski Jun 23, 2025
be68b2c
Update Cargo.lock
kskalski Jun 23, 2025
62f5f58
Update buf size
kskalski Jun 23, 2025
612df06
Fix buf sizing. Fix warning for macos compilation.
kskalski Jun 23, 2025
34766a2
Better compatibility.
kskalski Jun 23, 2025
f92a79f
Update setting permissions for windows build.
kskalski Jun 24, 2025
f070e54
Update types for windows.
kskalski Jun 24, 2025
d02184a
Fix passing path.
kskalski Jun 24, 2025
1ac790c
Use mode for set_file_readonly call.
kskalski Jun 24, 2025
2ca1479
Remove threads params. Allow overwrite.
kskalski Jun 24, 2025
4521781
Trim down buf size.
kskalski Jun 24, 2025
72791e5
Only send file paths in unpack_archive callback
kskalski Jun 24, 2025
438e2cf
Remove duplicate io_uring_supported fn.
kskalski Jun 26, 2025
4754f94
Revert cleanup change
kskalski Jun 26, 2025
ace219b
Revert cleanup in br.
kskalski Jun 26, 2025
3d81986
Fix merge conflict
kskalski Jul 8, 2025
2d26add
Address some PR comments.
kskalski Jul 8, 2025
b4b9178
Change io_buf_index to Option<u16>.
kskalski Jul 8, 2025
9130fc3
Small renames.
kskalski Jul 8, 2025
be62804
Remove unnecessary clone.
kskalski Jul 8, 2025
959cc7e
Also rename callback in io_uring impl.
kskalski Jul 8, 2025
4e876bc
Address some PR comments.
kskalski Jul 9, 2025
31cfa40
Add memlock limit enforce check.
kskalski Jul 9, 2025
16ef1cc
Reduce mem usage of genesis unpack. Debug unwrap register buf.
kskalski Jul 9, 2025
118bed8
Revert cargo change.
kskalski Jul 9, 2025
bc60d7c
More unwrap debugs.
kskalski Jul 9, 2025
07c0045
Remove .setup_coop_taskrun.
kskalski Jul 9, 2025
1aa77de
Refactor register functions.
kskalski Jul 9, 2025
52e7e73
Cleanup lifetime.
kskalski Jul 9, 2025
26f342a
Fix assert
kskalski Jul 9, 2025
8aa107e
Try setting unlimited memlock.
kskalski Jul 9, 2025
ac6618c
Debug current limit.
kskalski Jul 9, 2025
cff6383
Make step to set memlock limit.
kskalski Jul 9, 2025
c957160
Adjust buf size based on unarchive size and count constraints.
kskalski Jul 9, 2025
ece5288
Revert whitespace
kskalski Jul 9, 2025
c46bf43
Allow buffer size to be non-aligned to write capacity.
kskalski Jul 9, 2025
1007d2d
Addressing PR comments.
kskalski Jul 11, 2025
8f70a55
Fix compile.
kskalski Jul 11, 2025
5eb267f
Extract stats to struct. Cleanups.
kskalski Jul 11, 2025
b1b3355
Add a few comments.
kskalski Jul 11, 2025
b981293
Add unsafe markers and comments.
kskalski Jul 11, 2025
a8c705f
Simpler and less bug-prone logic for sizing file creator buffer.
kskalski Jul 13, 2025
a6de647
Remove squeue::Flags::ASYNC.
kskalski Jul 14, 2025
b4cad55
Update comments. Turn params.
kskalski Jul 14, 2025
ae2b062
Remove fallback for sequential file reader not being created.
kskalski Jul 14, 2025
17ae987
Address PR comments.
kskalski Jul 17, 2025
d34d19b
Clippy pub use order.
kskalski Jul 17, 2025
196bd8f
Elide lifetime.
kskalski Jul 17, 2025
dec2263
Factor out and extend function checking for fallback to tar. Add comm…
kskalski Jul 17, 2025
9cb5aa4
Address some PR comments.
kskalski Jul 17, 2025
9445866
Fix more format strings. Derive Debug for FixedIoBuffer.
kskalski Jul 18, 2025
dc14171
Merge branch 'master' into ks/dev/tar_unpack
kskalski Jul 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Release channels have their own copy of this changelog:
### Validator

#### Breaking
* Require increased `memlock` limits - recommended setting is `LimitMEMLOCK=2000000000` in systemd service configuration. Lack of sufficient limit (on Linux) will cause startup error.
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

* Remove deprecated arguments
* `--accounts-index-memory-limit-mb`
* `--accountsdb-repl-bind-address`, `--accountsdb-repl-port`, `--accountsdb-repl-threads`, `--enable-accountsdb-repl`
Expand All @@ -41,7 +42,6 @@ Release channels have their own copy of this changelog:
* Using `--snapshot-interval-slots 0` to disable generating snapshots has been removed. Use `--no-snapshots` instead.

#### Changes
* Reading snapshot archives requires increased `memlock` limits - recommended setting is `LimitMEMLOCK=2000000000` in systemd service configuration. Lack of sufficient limit will result slower startup times.
* `--transaction-structure view` is now the default.
* The default full snapshot interval is now 100,000 slots.

Expand Down
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion accounts-db/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ solana-measure = { workspace = true }
solana-message = { workspace = true }
solana-metrics = { workspace = true }
solana-nohash-hasher = { workspace = true }
solana-perf = { workspace = true }
solana-pubkey = { workspace = true }
solana-rayon-threadlimit = { workspace = true }
solana-rent = { workspace = true, optional = true }
Expand Down
10 changes: 3 additions & 7 deletions accounts-db/src/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,13 +286,9 @@ pub fn large_file_buf_reader(
if agave_io_uring::io_uring_supported() {
use crate::io_uring::sequential_file_reader::SequentialFileReader;

let io_uring_reader = SequentialFileReader::with_capacity(buf_size, &path);
match io_uring_reader {
Ok(reader) => return Ok(Box::new(reader)),
Err(error) => {
log::warn!("unable to create io_uring reader: {error}");
}
}
return Ok(Box::new(SequentialFileReader::with_capacity(
buf_size, path,
)?));
}
let file = File::open(path)?;
Ok(Box::new(BufReader::with_capacity(buf_size, file)))
Expand Down
171 changes: 168 additions & 3 deletions accounts-db/src/file_io.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
//! File i/o helper functions.
use std::{fs::File, ops::Range};
use std::{
fs::{File, OpenOptions},
io::{self, BufWriter, Write},
ops::Range,
path::PathBuf,
sync::Arc,
};

/// `buffer` contains `valid_bytes` of data at its end.
/// Move those valid bytes to the beginning of `buffer`, then read from `offset` to fill the rest of `buffer`.
Expand Down Expand Up @@ -83,10 +89,105 @@ pub fn read_into_buffer(
Ok(total_bytes_read)
}

/// An asynchronous queue for file creation.
pub trait FileCreator {
/// Schedule creating a file at `path` with `mode` permissions and bytes read from `contents`.
///
/// `parent_dir_handle` is assumed to be a parent directory of `path` such that file may be
/// created using optimized kernel API to create `path.file_name()` inside `parent_dir_handle`.
fn schedule_create_at_dir(
&mut self,
path: PathBuf,
mode: u32,
parent_dir_handle: Arc<File>,
contents: &mut dyn io::Read,
) -> io::Result<()>;

/// Invoke implementation specific logic to handle file creation completion.
fn file_complete(&mut self, path: PathBuf);

/// Waits for all operations to be completed
fn drain(&mut self) -> io::Result<()>;
}

pub fn file_creator<'a>(
buf_size: usize,
file_complete: impl FnMut(PathBuf) + 'a,
) -> io::Result<Box<dyn FileCreator + 'a>> {
#[cfg(target_os = "linux")]
if agave_io_uring::io_uring_supported() {
Comment thread
alessandrod marked this conversation as resolved.
Outdated
use crate::io_uring::file_creator::IoUringFileCreator;

let io_uring_creator = IoUringFileCreator::with_buffer_capacity(buf_size, file_complete)?;
return Ok(Box::new(io_uring_creator));
Comment on lines 121 to 122
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Much of the other io_uring impl fall back to the slow/backup impl if the io_uring construction fails. I see we're not doing that here. Not saying this is wrong, just calling it out as different.

Is this the correct/desired behavior?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, this is basically the direction that alessandro insisted on, and I agree this the right call at this point:

  • we only support linux in prod and only kernels that do support io_uring, so having a single code path that is used always is just simpler and cleaner
  • the fallback would have increasingly terrible performance on prod scale - it was kind of acceptable for reader thread, but since now we are doing both reading + decompression and unpacking on a single thread, it would be slow enough that it's better to just panic and insist on operator to configure the system properly
  • in fact once I enabled this and made it be used on all CI workflows, it helped me to narrow some bugs/deficiencies and to become more assured it works well (with silent fallback to different impl you never know how much you are testing actual code you want to be used)
  • we are still planning to expand io_uring usage, so all above points will just be strengthened going forward

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the reply. I'm on board with this approach.

}
Ok(Box::new(SyncIoFileCreator::new(buf_size, file_complete)))
}

pub struct SyncIoFileCreator<'a> {
file_complete: Box<dyn FnMut(PathBuf) + 'a>,
}

impl<'a> SyncIoFileCreator<'a> {
fn new(_buf_size: usize, file_complete: impl FnMut(PathBuf) + 'a) -> Self {
Self {
file_complete: Box::new(file_complete),
}
}
}

#[cfg(not(unix))]
pub(super) fn set_file_readonly(path: &std::path::Path, readonly: bool) -> io::Result<()> {
let mut perm = std::fs::metadata(path)?.permissions();
perm.set_readonly(readonly);
std::fs::set_permissions(path, perm)
}

impl FileCreator for SyncIoFileCreator<'_> {
fn schedule_create_at_dir(
&mut self,
path: PathBuf,
mode: u32,
_parent_dir_handle: Arc<File>,
contents: &mut dyn io::Read,
) -> io::Result<()> {
// Open for writing (also allows overwrite) and apply `mode`
let mut options = OpenOptions::new();
options.create(true).truncate(true).write(true);

#[cfg(unix)]
std::os::unix::fs::OpenOptionsExt::mode(&mut options, mode);

let mut file_buf = BufWriter::new(options.open(&path)?);
io::copy(contents, &mut file_buf)?;
file_buf.flush()?;

#[cfg(not(unix))]
set_file_readonly(&path, mode & 0o200 == 0)?;

self.file_complete(path);
Ok(())
}

fn file_complete(&mut self, path: PathBuf) {
(self.file_complete)(path)
}

fn drain(&mut self) -> io::Result<()> {
Ok(())
}
}

#[cfg(test)]
mod tests {

use {super::*, std::io::Write, tempfile::tempfile};
use {
super::*,
std::{
fs,
io::{Cursor, Write},
},
tempfile::tempfile,
};

#[test]
fn test_read_into_buffer() {
Expand Down Expand Up @@ -193,4 +294,68 @@ mod tests {
bytes[start_offset..file_size]
);
}

fn read_file_to_string(path: &PathBuf) -> String {
String::from_utf8(fs::read(path).expect("Failed to read file"))
.expect("Failed to decode file contents")
}

#[test]
fn test_create_writes_contents() -> io::Result<()> {
let temp_dir = tempfile::tempdir()?;
let file_path = temp_dir.path().join("test.txt");
let contents = "Hello, world!";

// Shared state to capture callback invocations
let mut callback_invoked_path = None;

// Instantiate FileCreator
let mut creator = file_creator(2 << 20, |path| {
callback_invoked_path.replace(path);
})?;

let dir = Arc::new(File::open(temp_dir.path())?);
creator.schedule_create_at_dir(
file_path.clone(),
0o644,
dir,
&mut Cursor::new(contents),
)?;
creator.drain()?;
drop(creator);

assert_eq!(read_file_to_string(&file_path), contents);
assert_eq!(callback_invoked_path, Some(file_path));

Ok(())
}

#[test]
fn test_multiple_file_creations() -> io::Result<()> {
let temp_dir = tempfile::tempdir()?;
let mut callback_counter = 0;

let mut creator = file_creator(2 << 20, |path: PathBuf| {
let contents = read_file_to_string(&path);
assert!(contents.starts_with("File "));
callback_counter += 1;
})?;

let dir = Arc::new(File::open(temp_dir.path())?);
for i in 0..5 {
let file_path = temp_dir.path().join(format!("file_{i}.txt"));
let data = format!("File {i}");
creator.schedule_create_at_dir(
file_path,
0o600,
dir.clone(),
&mut Cursor::new(data),
)?;
}
creator.drain()?;
drop(creator);

assert_eq!(callback_counter, 5);
Ok(())
}
}
Loading
Loading