Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions apps/oxlint/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ rayon = { workspace = true }
rustc-hash = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
simdutf8 = { workspace = true, optional = true }
tempfile = { workspace = true }
tracing-subscriber = { workspace = true, features = [] } # Omit the `regex` feature

Expand All @@ -60,6 +61,6 @@ lazy-regex = { workspace = true }
[features]
default = []
allocator = ["dep:mimalloc-safe"]
oxlint2 = ["oxc_linter/oxlint2"]
disable_oxlint2 = ["oxc_linter/disable_oxlint2"]
oxlint2 = ["oxc_linter/oxlint2", "oxc_allocator/fixed_size", "dep:simdutf8"]
disable_oxlint2 = ["oxc_linter/disable_oxlint2", "oxc_allocator/disable_fixed_size"]
force_test_reporter = ["oxc_linter/force_test_reporter"]
3 changes: 3 additions & 0 deletions apps/oxlint/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ pub use oxc_linter::{
ExternalLinter, ExternalLinterCb, ExternalLinterLoadPluginCb, PluginLoadResult,
};

#[cfg(all(feature = "oxlint2", not(feature = "disable_oxlint2")))]
mod raw_fs;

#[cfg(all(feature = "allocator", not(miri), not(target_family = "wasm")))]
#[global_allocator]
static GLOBAL: mimalloc_safe::MiMalloc = mimalloc_safe::MiMalloc;
Expand Down
9 changes: 9 additions & 0 deletions apps/oxlint/src/lint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,15 @@ impl Runner for LintRunner {
// Spawn linting in another thread so diagnostics can be printed immediately from diagnostic_service.run.
rayon::spawn(move || {
let mut lint_service = LintService::new(&linter, allocator_pool, options);

// Use `RawTransferFileSystem` if `oxlint2` feature is enabled.
// This reads the source text into start of allocator, instead of the end.
#[cfg(all(feature = "oxlint2", not(feature = "disable_oxlint2")))]
{
use crate::raw_fs::RawTransferFileSystem;
lint_service = lint_service.with_file_system(Box::new(RawTransferFileSystem));
}

lint_service.run(&tx_error);
});

Expand Down
198 changes: 198 additions & 0 deletions apps/oxlint/src/raw_fs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
use std::{
fs::{self, File},
io::{self, Read},
mem::ManuallyDrop,
path::Path,
ptr, slice,
};

use oxc_allocator::Allocator;
use oxc_linter::RuntimeFileSystem;

/// File system used by `oxlint2`.
///
/// Identical to `OsFileSystem`, except that `read_to_arena_str` reads the file's contents into
/// start of the allocator, instead of the end. This conforms to what raw transfer needs.
///
/// Must only be used in conjunction with `AllocatorPool` with `fixed_size` feature enabled,
/// which wraps `Allocator`s with a custom `Drop` impl, which makes `read_to_arena_str` safe.
///
/// This is a temporary solution. When we replace `bumpalo` with our own allocator, all strings
/// will be written at start of the arena, so then `OsFileSystem` will work fine, and we can
/// remove `RawTransferFileSystem`. TODO: Do that!
pub struct RawTransferFileSystem;

impl RuntimeFileSystem for RawTransferFileSystem {
/// Read file from disk into start of `allocator`.
///
/// # SAFETY
/// `allocator` must not be dropped after calling this method.
/// See [`Allocator::alloc_bytes_start`] for more details.
///
/// This should be an unsafe method, but can't because we're implementing a safe trait method.
fn read_to_arena_str<'a>(
&self,
path: &Path,
allocator: &'a Allocator,
) -> Result<&'a str, std::io::Error> {
// SAFETY: Caller promises not to allow `allocator` to be dropped
unsafe { read_to_arena_str(path, allocator) }
}

fn write_file(&self, path: &Path, content: &str) -> Result<(), std::io::Error> {
fs::write(path, content)
}
}

/// Read the contents of a UTF-8 encoded file directly into arena allocator,
/// at the *start* of the arena, instead of the end.
///
/// Avoids intermediate allocations if file size is known in advance.
///
/// This function opens the file at `path`, reads its entire contents into memory
/// allocated from the given [`Allocator`], validates that the bytes are valid UTF-8,
/// and returns a borrowed `&str` pointing to the allocator-backed data.
///
/// This is useful for performance-critical workflows where zero-copy string handling is desired,
/// such as parsing large source files in memory-constrained or throughput-sensitive environments.
///
/// # Parameters
///
/// - `path`: The path to the file to read.
/// - `allocator`: The [`Allocator`] into which the file contents will be allocated.
///
/// # Errors
///
/// Returns [`io::Error`] if any of:
///
/// - The file cannot be read.
/// - The file's contents are not valid UTF-8.
/// - The file's size exceeds the capacity of `allocator`.
///
/// # SAFETY
/// `allocator` must not be dropped after calling this method.
/// See [`Allocator::alloc_bytes_start`] for more details.
unsafe fn read_to_arena_str<'alloc>(
path: &Path,
allocator: &'alloc Allocator,
) -> io::Result<&'alloc str> {
let file = File::open(path)?;

let bytes = if let Ok(metadata) = file.metadata() {
// SAFETY: Caller guarantees `allocator` is not dropped after calling this method
unsafe { read_to_arena_bytes_known_size(file, metadata.len(), allocator) }
} else {
// SAFETY: Caller guarantees `allocator` is not dropped after calling this method
unsafe { read_to_arena_bytes_unknown_size(file, allocator) }
}?;

// Convert to `&str`, checking contents is valid UTF-8
simdutf8::basic::from_utf8(bytes).map_err(|_| {
io::Error::new(io::ErrorKind::InvalidData, "stream did not contain valid UTF-8")
})
}

/// Read contents of file directly into arena.
///
/// # SAFETY
/// `allocator` must not be dropped after calling this method.
/// See [`Allocator::alloc_bytes_start`] for more details.
unsafe fn read_to_arena_bytes_known_size(
file: File,
size: u64,
allocator: &Allocator,
) -> io::Result<&[u8]> {
// Check file is not larger than `usize::MAX` bytes.
// Note: We don't need to check `size` is not larger than `isize::MAX` bytes here,
// because `Allocator::alloc_bytes_start` does a size check.
let Ok(mut size) = usize::try_from(size) else {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"File is larger than `usize::MAX` bytes",
));
};

// Allocate space for string in allocator.
// SAFETY: Caller guarantees `allocator` is not dropped after calling this method.
let ptr = unsafe { allocator.alloc_bytes_start(size) };

// Read contents of file into allocated space.
//
// * Create a `Vec` which pretends to own the allocation we just created in arena.
// * Wrap the `Vec` in `ManuallyDrop`, so it doesn't free the memory at end of the block,
// or if there's a panic during reading.
// * Use `File::take` to obtain a reader which yields no more than `size` bytes.
// This ensures it can't produce more data than we allocated space for - in case file increased
// in size since the call to `file.metadata()`, or `file.metadata()` returned inaccurate size.
// * Use `Read::read_to_end` to fill the `Vec` from this reader.
//
// This is a hack. It's completely bananas that Rust doesn't provide a method to write into
// a slice of uninitialized bytes, but this seems to be the only safe way to do it on stable Rust.
// https://users.rust-lang.org/t/reading-c-style-structures-from-disk/70529/7
//
// I (@overlookmotel) have reviewed the code for `Read::read_to_end` and it will never grow the `Vec`,
// as long as it has sufficient capacity for the reader's contents to start with.
// If it did, that would be UB as it would free the chunk of memory backing the `Vec`,
// which it doesn't actually own.
//
// Unfortunately `Read::read_to_end`'s docs don't guarantee this behavior. But the code is written
// specifically to avoid growing the `Vec`, and there was a PR to make sure it doesn't:
// https://github.com/rust-lang/rust/pull/89165
// So I think in practice we can rely on this behavior.
{
// SAFETY: We've allocated `size` bytes starting at `ptr`.
// This `Vec` doesn't actually own that memory, but we immediately wrap it in `ManuallyDrop`,
// so it won't free the memory on drop. As long as the `Vec` doesn't grow, no UB (see above).
let vec = unsafe { Vec::from_raw_parts(ptr.as_ptr(), 0, size) };
let mut vec = ManuallyDrop::new(vec);
let bytes_written = file.take(size as u64).read_to_end(&mut vec)?;

debug_assert!(vec.capacity() == size);
debug_assert!(vec.len() == bytes_written);

// Update `size`, in case file was altered and got smaller since the call to `file.metadata()`,
// or `file.metadata()` reported inaccurate size
size = vec.len();
}

// SAFETY: `size` bytes were written starting at `ptr`.
// Those bytes will remain untouched until the `Allocator` is reset, so returning a `&[u8]` with
// same lifetime as the `&Allocator` borrow is valid.
let bytes = unsafe { slice::from_raw_parts(ptr.as_ptr(), size) };
Ok(bytes)
}

/// Fallback for when file size is unknown.
/// Read file contents into a `Vec`, and then copy into arena.
///
/// # SAFETY
/// `allocator` must not be dropped after calling this method.
/// See [`Allocator::alloc_bytes_start`] for more details.
unsafe fn read_to_arena_bytes_unknown_size(
mut file: File,
allocator: &Allocator,
) -> io::Result<&[u8]> {
// Read file into a `Vec`
let mut bytes = Vec::new();
file.read_to_end(&mut bytes)?;

// Copy bytes into start of allocator chunk.
//
// SAFETY:
// * `alloc_bytes_start` allocates space for `len` bytes at start of the arena chunk.
// That allocation cannot overlap the allocation owned by `bytes` vec.
// * After `copy_nonoverlapping` call, `len` bytes starting from `dst` are initialized,
// so safe to create a byte slice referencing those bytes.
// * Those bytes will remain untouched until the `Allocator` is reset, so returning a `&[u8]` with
// same lifetime as the `&Allocator` borrow is valid.
// * Caller guarantees `allocator` is not dropped after calling this method.
let slice = unsafe {
let src = bytes.as_ptr();
let len = bytes.len();
let dst = allocator.alloc_bytes_start(len).as_ptr();
ptr::copy_nonoverlapping(src, dst, len);
slice::from_raw_parts(dst, len)
};

Ok(slice)
}
2 changes: 1 addition & 1 deletion crates/oxc_allocator/src/fixed_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ const FOUR_GIB: usize = 1 << (IS_SUPPORTED_PLATFORM as usize * 32);
const ALLOC_SIZE: usize = FOUR_GIB;
const ALLOC_ALIGN: usize = TWO_GIB;
const CHUNK_SIZE: usize = TWO_GIB;
const CHUNK_ALIGN: usize = FOUR_GIB;
pub const CHUNK_ALIGN: usize = FOUR_GIB;

const ALLOC_LAYOUT: Layout = match Layout::from_size_align(ALLOC_SIZE, ALLOC_ALIGN) {
Ok(layout) => layout,
Expand Down
17 changes: 16 additions & 1 deletion crates/oxc_allocator/src/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ mod wrapper {

#[cfg(all(feature = "fixed_size", not(feature = "disable_fixed_size")))]
mod wrapper {
use crate::{Allocator, fixed_size::FixedSizeAllocator};
use crate::{
Allocator,
fixed_size::{CHUNK_ALIGN, FixedSizeAllocator},
};

/// Structure which wraps an [`Allocator`] with fixed size of 2 GiB, and aligned on 4 GiB.
///
Expand All @@ -127,7 +130,19 @@ mod wrapper {

/// Reset the [`Allocator`] in this [`AllocatorWrapper`].
pub fn reset(&mut self) {
// Set cursor back to end
self.0.reset();

// Set data pointer back to start.
// SAFETY: Fixed-size allocators have data pointer originally aligned on `CHUNK_ALIGN`,
// and size less than `CHUNK_ALIGN`. So we can restore original data pointer by rounding down
// to next multiple of `CHUNK_ALIGN`.
unsafe {
let data_ptr = self.0.data_ptr();
let offset = data_ptr.as_ptr() as usize % CHUNK_ALIGN;
let data_ptr = data_ptr.sub(offset);
self.0.set_data_ptr(data_ptr);
}
}

/// Create a `Vec` of [`AllocatorWrapper`]s.
Expand Down
Loading