diff --git a/crates/oxc_allocator/src/pool_fixed_size.rs b/crates/oxc_allocator/src/pool_fixed_size.rs index 78a2892534d67..6a2df7fb9aa93 100644 --- a/crates/oxc_allocator/src/pool_fixed_size.rs +++ b/crates/oxc_allocator/src/pool_fixed_size.rs @@ -1,6 +1,7 @@ use std::{ alloc::{self, GlobalAlloc, Layout, System}, mem::ManuallyDrop, + num::NonZeroUsize, ops::Deref, ptr::NonNull, sync::{ @@ -19,23 +20,95 @@ use crate::{ const TWO_GIB: usize = 1 << 31; const FOUR_GIB: usize = 1 << 32; +/// Maximum number of threads supported. +/// +/// This is the same value that Rayon uses. +/// +/// +/// We don't use `rayon::max_num_threads()` to get this value because Rayon's docs state that +/// "value may vary between different targets, and is subject to change in new Rayon versions". +/// +/// +/// We need an absolute guarantee that maximum `WorkerFlagsCount` is less than `TWO_GIB` minus size of +/// all metadata, so worker thread flags definitely fit in the allocator chunk. So we hard-code it. +/// +/// If Rayon decreases its max thread count in future, it's fine - thread count will never reach our limit. +/// If Rayon increases its max thread count, that's also not too problematic - we just impose a lower limit +/// than Rayon, and panic if that limit is exceeded. We can raise the limit if needed. +/// +/// This is all academic anyway. It seems unlikely any machine running Oxc will have anywhere near +/// 65535 CPU cores! +const MAX_THREADS: usize = (1 << 16) - 1; // 65535 + +mod worker_flags_count { + use super::*; + + /// Number of bytes for worker thread flags in a [`FixedSizeAllocator`]. + /// + /// Wrapper around `NonZeroUsize` which limits value to between 16 and `MAX_THREADS + 1`, + /// and a multiple of 16. + #[derive(Clone, Copy)] + #[repr(transparent)] + pub struct WorkerFlagsCount { + count: NonZeroUsize, + } + + impl WorkerFlagsCount { + /// Create a new [`WorkerFlagsCount`] for the specified number of threads. + /// + /// # Panics + /// Panics if `thread_count` is 0 or exceeds `MAX_THREADS`. + #[inline] + pub fn new(thread_count: usize) -> Self { + assert!(thread_count > 0, "Thread count cannot be 0"); + assert!(thread_count <= MAX_THREADS, "Thread count cannot exceed {MAX_THREADS}"); + + // Worker flags are stored at end of allocator chunks, just before `ChunkFooter`. + // Using a multiple of 16 may make it a little faster to initialize the flags bytes. + let count = thread_count.next_multiple_of(16); + // SAFETY: We checked that `count > 0` above + let count = unsafe { NonZeroUsize::new_unchecked(count) }; + WorkerFlagsCount { count } + } + + /// Get the worker flags count. + #[inline] + pub fn get(self) -> usize { + self.count.get() + } + } +} +use worker_flags_count::WorkerFlagsCount; + /// A thread-safe pool for reusing [`Allocator`] instances to reduce allocation overhead. /// /// Internally uses a `Vec` protected by a `Mutex` to store available allocators. pub struct AllocatorPool { /// Allocators in the pool allocators: Mutex>, + /// Number of bytes reserved for flags indicating which JS worker threads have been sent an allocator + worker_flags_count: WorkerFlagsCount, /// ID to assign to next `Allocator` that's created next_id: AtomicU32, } impl AllocatorPool { /// Creates a new [`AllocatorPool`] for use across the specified number of threads. + /// + /// # Panics + /// Panics if `thread_count` is 0 or exceeds `MAX_THREADS`. pub fn new(thread_count: usize) -> AllocatorPool { + let worker_flags_count = WorkerFlagsCount::new(thread_count); + // Each allocator consumes a large block of memory, so create them on demand instead of upfront, // in case not all threads end up being used (e.g. language server without `import` plugin) let allocators = Vec::with_capacity(thread_count); - AllocatorPool { allocators: Mutex::new(allocators), next_id: AtomicU32::new(0) } + + AllocatorPool { + allocators: Mutex::new(allocators), + worker_flags_count, + next_id: AtomicU32::new(0), + } } /// Retrieves an [`Allocator`] from the pool, or creates a new one if the pool is empty. @@ -58,7 +131,7 @@ impl AllocatorPool { // Protect against IDs wrapping around. // TODO: Does this work? Do we need it anyway? assert!(id < u32::MAX, "Created too many allocators"); - FixedSizeAllocator::new(id) + FixedSizeAllocator::new(id, self.worker_flags_count) }); AllocatorGuard { allocator: ManuallyDrop::new(allocator), pool: self } @@ -98,7 +171,7 @@ impl Drop for AllocatorGuard<'_> { fn drop(&mut self) { // SAFETY: After taking ownership of the `FixedSizeAllocator`, we do not touch the `ManuallyDrop` again let mut allocator = unsafe { ManuallyDrop::take(&mut self.allocator) }; - allocator.reset(); + allocator.reset(self.pool.worker_flags_count); self.pool.add(allocator); } } @@ -179,7 +252,8 @@ pub const ALLOC_LAYOUT: Layout = match Layout::from_size_align(ALLOC_SIZE, ALLOC /// ALLOCATOR /// <-----------------------------------------> `Allocator` chunk (`CHUNK_SIZE` bytes) /// <----> Bumpalo's `ChunkFooter` (aligned on 16) -/// <-----------------------------------> `Allocator` chunk data storage (for AST) +/// <----> Worker thread flags (aligned on 16) +/// <-----------------------------> `Allocator` chunk data storage (for AST) /// /// METADATA /// <----> `RawTransferMetadata` @@ -197,6 +271,9 @@ pub const ALLOC_LAYOUT: Layout = match Layout::from_size_align(ALLOC_SIZE, ALLOC /// * `BLOCK_SIZE` is a multiple of 16. /// * `RawTransferMetadata` is 16 bytes. /// * Size of `FixedSizeAllocatorMetadata` is rounded up to a multiple of 16. +/// +/// TODO: This layout is a bit of a mess, and could be simplified. +/// All the metadata could be stored inside the `Allocator` chunk. pub struct FixedSizeAllocator { /// `Allocator` which utilizes part of the original allocation allocator: ManuallyDrop, @@ -205,7 +282,7 @@ pub struct FixedSizeAllocator { impl FixedSizeAllocator { /// Create a new [`FixedSizeAllocator`]. #[expect(clippy::items_after_statements)] - pub fn new(id: u32) -> Self { + pub fn new(id: u32, worker_flags_count: WorkerFlagsCount) -> Self { // Only support little-endian systems. `Allocator::from_raw_parts` includes this same assertion. // This module is only compiled on 64-bit little-endian systems, so it should be impossible for // this panic to occur. But we want to make absolutely sure that if there's a mistake elsewhere, @@ -269,24 +346,49 @@ impl FixedSizeAllocator { metadata_ptr.write(metadata); } - Self { allocator } + // Now that `FixedSizeAllocatorMetadata` is written, it's safe to wrap the `Allocator` + // in a `FixedSizeAllocator`. + // Do this now so the allocation gets freed in case of a panic later in this function. + let fixed_size_allocator = Self { allocator }; + let allocator = &*fixed_size_allocator.allocator; + + // Initialize worker thread flags. + // SAFETY: `data_end_ptr` is pointer to very end of the data region of the chunk. + // `worker_flags_count.get()` is guaranteed to be less than size of the chunk's data section, + // so subtracting it from `data_end_ptr` cannot go out of bounds of the chunk. + let flags_ptr = unsafe { allocator.data_end_ptr().sub(worker_flags_count.get()) }; + // SAFETY: `flags_ptr` is valid for writing `flags_layout.size()` bytes. + // `u8` has no alignment requirements. + // These bytes will be read as `bool`s. 0 is a valid value for `bool` (`false`). + unsafe { flags_ptr.write_bytes(0, worker_flags_count.get()) }; + + // Set cursor to before flags, so they're not overwritten. + // SAFETY: `flags_ptr` points to within allocator chunk, and after chunk's data pointer. + unsafe { allocator.set_cursor_ptr(flags_ptr) }; + + fixed_size_allocator } /// Reset this [`FixedSizeAllocator`]. - fn reset(&mut self) { - // Set cursor back to end - self.allocator.reset(); + fn reset(&mut self, worker_flags_count: WorkerFlagsCount) { + // Set cursor back to end (but before the worker thread flags) + let data_end_ptr = self.allocator.data_end_ptr(); + // SAFETY: `data_end_ptr` is pointer to very end of the data region of the chunk. + // `worker_flags_count.get()` is guaranteed to be less than size of the chunk's data section, + // so subtracting it from `data_end_ptr` cannot go out of bounds of the chunk. + let cursor_ptr = unsafe { data_end_ptr.sub(worker_flags_count.get()) }; + // SAFETY: `cursor_ptr` points to within allocator chunk (see above) + unsafe { self.allocator.set_cursor_ptr(cursor_ptr) }; // Set data pointer back to start. // SAFETY: Fixed-size allocators have data pointer originally aligned on `BLOCK_ALIGN`, // and size less than `BLOCK_ALIGN`. So we can restore original data pointer by rounding down - // to next multiple of `BLOCK_ALIGN`. + // any pointer to within the chunk to next multiple of `BLOCK_ALIGN`. // We're restoring the original data pointer, so it cannot break invariants about alignment, // being within the chunk's allocation, or being before cursor pointer. unsafe { - let data_ptr = self.allocator.data_ptr(); - let offset = data_ptr.as_ptr() as usize % BLOCK_ALIGN; - let data_ptr = data_ptr.sub(offset); + let offset = cursor_ptr.as_ptr() as usize % BLOCK_ALIGN; + let data_ptr = cursor_ptr.sub(offset); self.allocator.set_data_ptr(data_ptr); } } diff --git a/crates/oxc_linter/src/service/runtime.rs b/crates/oxc_linter/src/service/runtime.rs index 66e97ce48ef2e..10e1756008a7d 100644 --- a/crates/oxc_linter/src/service/runtime.rs +++ b/crates/oxc_linter/src/service/runtime.rs @@ -252,6 +252,8 @@ impl Runtime { // > In the future, the default behavior may change to dynamically add or remove threads as needed. // https://docs.rs/rayon/1.11.0/rayon/struct.ThreadPoolBuilder.html#method.num_threads // + // That behavior change would break the assumption that number of threads is constant after + // this call, which could result in writing out of bounds of the worker threads flags array. // However, I (@overlookmotel) assume that would be considered a breaking change, // so we don't have to worry about it until Rayon v2. // When Rayon v2 is released and we upgrade to it, we'll need to revisit this and make sure diff --git a/napi/oxlint2/src/lib.rs b/napi/oxlint2/src/lib.rs index 07ee0e5fccf25..ce96092d722df 100644 --- a/napi/oxlint2/src/lib.rs +++ b/napi/oxlint2/src/lib.rs @@ -75,9 +75,13 @@ fn wrap_lint_file(cb: JsLintFileCb) -> ExternalLinterLintFileCb { let (tx, rx) = channel(); + // At present, JS plugins all run on main thread, so `thread_id` is always 0 + let thread_id = 0; + // SAFETY: This crate enables the `fixed_size` feature on `oxc_allocator`, - // so all AST `Allocator`s are created via `FixedSizeAllocator` - let (buffer_id, buffer) = unsafe { get_buffer(allocator) }; + // so all AST `Allocator`s are created via `FixedSizeAllocator`. + // `thread_id` is valid. There there's always at least 1 thread, so 0 cannot be too high. + let (buffer_id, buffer) = unsafe { get_buffer(allocator, thread_id) }; // Send data to JS let status = cb.call_with_return_value( @@ -108,26 +112,29 @@ fn wrap_lint_file(cb: JsLintFileCb) -> ExternalLinterLintFileCb { }) } -/// Get buffer ID of the `Allocator` and, if it hasn't already been sent to JS, +/// Get buffer ID of the `Allocator` and, if it hasn't already been sent to this JS thread, /// create a `Uint8Array` referencing the `Allocator`'s memory. /// -/// Each buffer is sent over to JS only once. +/// Each buffer is sent over to each JS thread only once. /// JS side stores them in an array (indexed by buffer ID), and holds them until process ends. -/// This means there's only ever 1 instance of a buffer on Rust side, and 1 on JS side, +/// This means there's only ever 1 instance of a buffer on Rust side, and 1 on each JS thread, /// which makes it simpler to avoid use-after-free or double-free problems. /// -/// So only create a `Uint8Array` if it's not already sent to JS. +/// So only create a `Uint8Array` if it's not already sent to this JS thread. /// -/// Whether the buffer has already been sent to JS is tracked by a reference counter -/// in `FixedSizeAllocatorMetadata`, which is stored in memory backing the `Allocator`. +/// Whether the buffer has already been send to this JS thread is tracked by a series of `bool` flags +/// stored in the `Allocator`'s memory, just before the `ChunkFooter`. +/// There's a `bool` for each thread in the Rayon global thread. /// /// # SAFETY -/// `allocator` must have been created via `FixedSizeAllocator` +/// * `allocator` must have been created via `FixedSizeAllocator`. +/// * `thread_id` must be less than number of threads in Rayon global thread pool. unsafe fn get_buffer( allocator: &Allocator, + thread_id: usize, ) -> ( u32, // Buffer ID - Option, // Buffer, if not already sent to JS + Option, // Buffer, if not already sent to this JS thread ) { // SAFETY: Caller guarantees `Allocator` was created by a `FixedSizeAllocator`. // We only create an immutable ref from this pointer. @@ -138,18 +145,38 @@ unsafe fn get_buffer( let buffer_id = metadata.id; - // Get whether this buffer has already been sent to JS - // TODO: Is `SeqCst` excessive here? - let old_ref_count = metadata.ref_count.swap(2, Ordering::SeqCst); - let already_sent_to_js = old_ref_count > 1; - - // If buffer has already been sent to JS, don't send it again - if already_sent_to_js { + // Get whether this buffer has already been sent to this JS thread. + // + // This is tracked by a series of `bool` flags stored in the `Allocator`'s memory, + // just before the `ChunkFooter`. + // `FixedSizeAllocator` initialized N x `bool` flags, where N is the number of threads in Rayon's + // global thread pool. + // These flags reside in the slice of memory ranging from `data_end_ptr() - N` to `data_end_ptr() - 1`. + // + // We don't know how many threads there are here, so work backwards from the end. + // * Flag for thread 0 is at `data_end_ptr() - 1` + // * Flag for thread 1 is at `data_end_ptr() - 2`, etc. + // + // SAFETY: Caller guarantees `thread_id` is less than number of threads in Rayon global thread pool. + // Therefore `data_end_ptr() - (thread_id + 1)` points to the flag for this thread, + // and it must be a valid initialized `bool`. + let sent_to_js_thread = + unsafe { allocator.data_end_ptr().cast::().sub(thread_id + 1).as_mut() }; + + // If buffer has already been sent to this JS thread, don't send it again + if *sent_to_js_thread { return (buffer_id, None); } // Buffer has not already been sent to JS. Send it. + // Record that this buffer has now been sent to this JS thread + *sent_to_js_thread = true; + + // Increment reference count for this allocator + // TODO: Is `SeqCst` excessive here? + metadata.ref_count.fetch_add(1, Ordering::SeqCst); + // Get pointer to start of allocator chunk. // Note: `Allocator::data_ptr` would not provide the right pointer, because source text // gets written to start of the allocator chunk, and `data_ptr` gets moved to after it.