-
-
Notifications
You must be signed in to change notification settings - Fork 3k
fs: support io_uring with tokio::fs::read
#7696
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
7cb36cf
io-uring: Implement tokio::fs::read
Daksh14 137b0c1
io-uring: re-think fs read with std lib's implementation
Daksh14 73735c9
io-uring: Use stack method to prevent realloc in case of 0 size read
Daksh14 051aa82
io-uring: Fix check and reduce MAX_READ_SIZE to 64 blocks
Daksh14 ad39b92
io-uring: Create `op_read` helper function
Daksh14 233226c
io-uring: Don't return `size_read` but boolean for EOF check
Daksh14 fb23b82
Fix typos
Daksh14 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,134 @@ | ||
| use crate::fs::OpenOptions; | ||
| use crate::runtime::driver::op::Op; | ||
|
|
||
| use std::io; | ||
| use std::io::ErrorKind; | ||
| use std::os::fd::OwnedFd; | ||
| use std::path::Path; | ||
|
|
||
| // this algorithm is inspired from rust std lib version 1.90.0 | ||
| // https://doc.rust-lang.org/1.90.0/src/std/io/mod.rs.html#409 | ||
| const PROBE_SIZE: usize = 32; | ||
| const PROBE_SIZE_U32: u32 = PROBE_SIZE as u32; | ||
|
|
||
| // Max bytes we can read using io uring submission at a time | ||
| // SAFETY: cannot be higher than u32::MAX for safe cast | ||
| // Set to read max 64 MiB at time | ||
| const MAX_READ_SIZE: usize = 64 * 1024 * 1024; | ||
|
|
||
| pub(crate) async fn read_uring(path: &Path) -> io::Result<Vec<u8>> { | ||
| let file = OpenOptions::new().read(true).open(path).await?; | ||
|
|
||
| // TODO: use io uring in the future to obtain metadata | ||
| let size_hint: Option<usize> = file.metadata().await.map(|m| m.len() as usize).ok(); | ||
|
|
||
| let fd: OwnedFd = file | ||
| .try_into_std() | ||
| .expect("unexpected in-flight operation detected") | ||
| .into(); | ||
|
|
||
| let mut buf = Vec::new(); | ||
|
|
||
| if let Some(size_hint) = size_hint { | ||
| buf.try_reserve(size_hint)?; | ||
| } | ||
|
|
||
| read_to_end_uring(fd, buf).await | ||
| } | ||
|
|
||
| async fn read_to_end_uring(mut fd: OwnedFd, mut buf: Vec<u8>) -> io::Result<Vec<u8>> { | ||
|
ADD-SP marked this conversation as resolved.
|
||
| let mut offset = 0; | ||
| let start_cap = buf.capacity(); | ||
|
|
||
| loop { | ||
| if buf.len() == buf.capacity() && buf.capacity() == start_cap && buf.len() >= PROBE_SIZE { | ||
| // The buffer might be an exact fit. Let's read into a probe buffer | ||
| // and see if it returns `Ok(0)`. If so, we've avoided an | ||
| // unnecessary increasing of the capacity. But if not, append the | ||
| // probe buffer to the primary buffer and let its capacity grow. | ||
| let (r_fd, r_buf, is_eof) = small_probe_read(fd, buf, &mut offset).await?; | ||
|
|
||
| if is_eof { | ||
| return Ok(r_buf); | ||
| } | ||
|
|
||
| buf = r_buf; | ||
| fd = r_fd; | ||
| } | ||
|
|
||
| // buf is full, need more capacity | ||
| if buf.len() == buf.capacity() { | ||
| buf.try_reserve(PROBE_SIZE)?; | ||
| } | ||
|
|
||
| // prepare the spare capacity to be read into | ||
| let buf_len = usize::min(buf.spare_capacity_mut().len(), MAX_READ_SIZE); | ||
|
|
||
| // buf_len cannot be greater than u32::MAX because MAX_READ_SIZE | ||
| // is less than u32::MAX | ||
| let read_len = u32::try_from(buf_len).expect("buf_len must always fit in u32"); | ||
|
|
||
| // read into spare capacity | ||
| let (r_fd, r_buf, is_eof) = op_read(fd, buf, &mut offset, read_len).await?; | ||
|
|
||
| if is_eof { | ||
| return Ok(r_buf); | ||
| } | ||
|
|
||
| fd = r_fd; | ||
| buf = r_buf; | ||
| } | ||
| } | ||
|
|
||
| async fn small_probe_read( | ||
| fd: OwnedFd, | ||
| mut buf: Vec<u8>, | ||
| offset: &mut u64, | ||
| ) -> io::Result<(OwnedFd, Vec<u8>, bool)> { | ||
| let read_len = PROBE_SIZE_U32; | ||
|
|
||
| let mut temp_arr = [0; PROBE_SIZE]; | ||
| // we don't call this function if buffer's length < PROBE_SIZE | ||
| let back_bytes_len = buf.len() - PROBE_SIZE; | ||
|
|
||
| temp_arr.copy_from_slice(&buf[back_bytes_len..]); | ||
|
|
||
| // We're decreasing the length of the buffer and len is greater | ||
| // than PROBE_SIZE. So we can read into the discarded length | ||
| buf.truncate(back_bytes_len); | ||
|
|
||
| let (r_fd, mut r_buf, is_eof) = op_read(fd, buf, offset, read_len).await?; | ||
| // If `size_read` returns zero due to reasons such as buffer's exact fit, | ||
| // then this `try_reserve` does not perform allocation. | ||
| r_buf.try_reserve(PROBE_SIZE)?; | ||
|
Daksh14 marked this conversation as resolved.
|
||
| r_buf.splice(back_bytes_len..back_bytes_len, temp_arr); | ||
|
Darksonn marked this conversation as resolved.
ADD-SP marked this conversation as resolved.
|
||
|
|
||
| Ok((r_fd, r_buf, is_eof)) | ||
| } | ||
|
Daksh14 marked this conversation as resolved.
|
||
|
|
||
| // Takes a amount of length to read and returns a singluar read in the buffer | ||
| // | ||
| // Returns the file descriptor, buffer and EOF reached or not | ||
| async fn op_read( | ||
| mut fd: OwnedFd, | ||
| mut buf: Vec<u8>, | ||
| offset: &mut u64, | ||
| read_len: u32, | ||
| ) -> io::Result<(OwnedFd, Vec<u8>, bool)> { | ||
| loop { | ||
| let (res, r_fd, r_buf) = Op::read(fd, buf, read_len, *offset).await; | ||
|
|
||
| match res { | ||
| Err(e) if e.kind() == ErrorKind::Interrupted => { | ||
| buf = r_buf; | ||
| fd = r_fd; | ||
| } | ||
| Err(e) => return Err(e), | ||
| Ok(size_read) => { | ||
| *offset += size_read as u64; | ||
|
|
||
| return Ok((r_fd, r_buf, size_read == 0)); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
ADD-SP marked this conversation as resolved.
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,4 @@ | ||
| pub(crate) mod open; | ||
| pub(crate) mod read; | ||
| pub(crate) mod utils; | ||
| pub(crate) mod write; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| use crate::runtime::driver::op::{CancelData, Cancellable, Completable, CqeResult, Op}; | ||
|
|
||
| use io_uring::{opcode, types}; | ||
| use std::io::{self, Error}; | ||
| use std::os::fd::{AsRawFd, OwnedFd}; | ||
|
|
||
| #[derive(Debug)] | ||
| pub(crate) struct Read { | ||
| fd: OwnedFd, | ||
| buf: Vec<u8>, | ||
| } | ||
|
|
||
| impl Completable for Read { | ||
| type Output = (io::Result<u32>, OwnedFd, Vec<u8>); | ||
|
|
||
| fn complete(self, cqe: CqeResult) -> Self::Output { | ||
| let mut buf = self.buf; | ||
|
|
||
| if let Ok(len) = cqe.result { | ||
| let new_len = buf.len() + len as usize; | ||
| // SAFETY: Kernel read len bytes | ||
| unsafe { buf.set_len(new_len) }; | ||
| } | ||
|
|
||
| (cqe.result, self.fd, buf) | ||
| } | ||
|
Daksh14 marked this conversation as resolved.
|
||
|
|
||
| fn complete_with_error(self, err: Error) -> Self::Output { | ||
| (Err(err), self.fd, self.buf) | ||
| } | ||
| } | ||
|
|
||
| impl Cancellable for Read { | ||
| fn cancel(self) -> CancelData { | ||
| CancelData::Read(self) | ||
| } | ||
| } | ||
|
|
||
| impl Op<Read> { | ||
| // Submit a request to read a FD at given length and offset into a | ||
| // dynamic buffer with uninitialized memory. The read happens on uninitialized | ||
| // buffer and no overwriting happens. | ||
|
|
||
| // SAFETY: The `len` of the amount to be read and the buffer that is passed | ||
| // should have capacity > len. | ||
| // | ||
| // If `len` read is higher than vector capacity then setting its length by | ||
| // the caller in terms of size_read can be unsound. | ||
| pub(crate) fn read(fd: OwnedFd, mut buf: Vec<u8>, len: u32, offset: u64) -> Self { | ||
|
Daksh14 marked this conversation as resolved.
|
||
| // don't overwrite on already written part | ||
| assert!(buf.spare_capacity_mut().len() >= len as usize); | ||
| let buf_mut_ptr = buf.spare_capacity_mut().as_mut_ptr().cast(); | ||
|
|
||
| let read_op = opcode::Read::new(types::Fd(fd.as_raw_fd()), buf_mut_ptr, len) | ||
| .offset(offset) | ||
| .build(); | ||
|
|
||
| // SAFETY: Parameters are valid for the entire duration of the operation | ||
| unsafe { Op::new(read_op, Read { fd, buf }) } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.