oxc-project · overlookmotel · Aug 14, 2025
@@ -1,6 +1,7 @@
 use std::{
     alloc::{self, GlobalAlloc, Layout, System},
     mem::ManuallyDrop,
+    num::NonZeroUsize,
     ops::Deref,
     ptr::NonNull,
     sync::{
@@ -19,23 +20,95 @@ use crate::{
 const TWO_GIB: usize = 1 << 31;
 const FOUR_GIB: usize = 1 << 32;
 
+/// Maximum number of threads supported.
+///
+/// This is the same value that Rayon uses.
+/// <https://github.com/rayon-rs/rayon/blob/7af20d7692d5decbcb4adcaa079cd607e3e50814/rayon-core/src/sleep/counters.rs#L55-L77>
+///
+/// We don't use `rayon::max_num_threads()` to get this value because Rayon's docs state that
+/// "value may vary between different targets, and is subject to change in new Rayon versions".
+/// <https://docs.rs/rayon/latest/rayon/fn.max_num_threads.html>
+///
+/// We need an absolute guarantee that maximum `WorkerFlagsCount` is less than `TWO_GIB` minus size of
+/// all metadata, so worker thread flags definitely fit in the allocator chunk. So we hard-code it.
+///
+/// If Rayon decreases its max thread count in future, it's fine - thread count will never reach our limit.
+/// If Rayon increases its max thread count, that's also not too problematic - we just impose a lower limit
+/// than Rayon, and panic if that limit is exceeded. We can raise the limit if needed.
+///
+/// This is all academic anyway. It seems unlikely any machine running Oxc will have anywhere near
+/// 65535 CPU cores!
+const MAX_THREADS: usize = (1 << 16) - 1; // 65535
+
+mod worker_flags_count {
+    use super::*;
+
+    /// Number of bytes for worker thread flags in a [`FixedSizeAllocator`].
+    ///
+    /// Wrapper around `NonZeroUsize` which limits value to between 16 and `MAX_THREADS + 1`,
+    /// and a multiple of 16.
+    #[derive(Clone, Copy)]
+    #[repr(transparent)]
+    pub struct WorkerFlagsCount {
+        count: NonZeroUsize,
+    }
+
+    impl WorkerFlagsCount {
+        /// Create a new [`WorkerFlagsCount`] for the specified number of threads.
+        ///
+        /// # Panics
+        /// Panics if `thread_count` is 0 or exceeds `MAX_THREADS`.
+        #[inline]
+        pub fn new(thread_count: usize) -> Self {
+            assert!(thread_count > 0, "Thread count cannot be 0");
+            assert!(thread_count <= MAX_THREADS, "Thread count cannot exceed {MAX_THREADS}");
+
+            // Worker flags are stored at end of allocator chunks, just before `ChunkFooter`.
+            // Using a multiple of 16 may make it a little faster to initialize the flags bytes.
+            let count = thread_count.next_multiple_of(16);
+            // SAFETY: We checked that `count > 0` above
+            let count = unsafe { NonZeroUsize::new_unchecked(count) };
+            WorkerFlagsCount { count }
+        }
+
+        /// Get the worker flags count.
+        #[inline]
+        pub fn get(self) -> usize {
+            self.count.get()
+        }
+    }
+}
+use worker_flags_count::WorkerFlagsCount;
+
 /// A thread-safe pool for reusing [`Allocator`] instances to reduce allocation overhead.
 ///
 /// Internally uses a `Vec` protected by a `Mutex` to store available allocators.
 pub struct AllocatorPool {
     /// Allocators in the pool
     allocators: Mutex<Vec<FixedSizeAllocator>>,
+    /// Number of bytes reserved for flags indicating which JS worker threads have been sent an allocator
+    worker_flags_count: WorkerFlagsCount,
     /// ID to assign to next `Allocator` that's created
     next_id: AtomicU32,
 }
 
 impl AllocatorPool {
     /// Creates a new [`AllocatorPool`] for use across the specified number of threads.
+    ///
+    /// # Panics
+    /// Panics if `thread_count` is 0 or exceeds `MAX_THREADS`.
     pub fn new(thread_count: usize) -> AllocatorPool {
+        let worker_flags_count = WorkerFlagsCount::new(thread_count);
+
         // Each allocator consumes a large block of memory, so create them on demand instead of upfront,
         // in case not all threads end up being used (e.g. language server without `import` plugin)
         let allocators = Vec::with_capacity(thread_count);
-        AllocatorPool { allocators: Mutex::new(allocators), next_id: AtomicU32::new(0) }
+
+        AllocatorPool {
+            allocators: Mutex::new(allocators),
+            worker_flags_count,
+            next_id: AtomicU32::new(0),
+        }
     }
 
     /// Retrieves an [`Allocator`] from the pool, or creates a new one if the pool is empty.
@@ -58,7 +131,7 @@ impl AllocatorPool {
             // Protect against IDs wrapping around.
             // TODO: Does this work? Do we need it anyway?
             assert!(id < u32::MAX, "Created too many allocators");
-            FixedSizeAllocator::new(id)
+            FixedSizeAllocator::new(id, self.worker_flags_count)
         });
 
         AllocatorGuard { allocator: ManuallyDrop::new(allocator), pool: self }
@@ -98,7 +171,7 @@ impl Drop for AllocatorGuard<'_> {
     fn drop(&mut self) {
         // SAFETY: After taking ownership of the `FixedSizeAllocator`, we do not touch the `ManuallyDrop` again
         let mut allocator = unsafe { ManuallyDrop::take(&mut self.allocator) };
-        allocator.reset();
+        allocator.reset(self.pool.worker_flags_count);
         self.pool.add(allocator);
     }
 }
@@ -179,7 +252,8 @@ pub const ALLOC_LAYOUT: Layout = match Layout::from_size_align(ALLOC_SIZE, ALLOC
 ///                                                         ALLOCATOR
 /// <----------------------------------------->             `Allocator` chunk (`CHUNK_SIZE` bytes)
 ///                                      <---->             Bumpalo's `ChunkFooter` (aligned on 16)
-/// <----------------------------------->                   `Allocator` chunk data storage (for AST)
+///                                <---->                   Worker thread flags (aligned on 16)
+/// <----------------------------->                         `Allocator` chunk data storage (for AST)
 ///
 ///                                                         METADATA
 ///                                            <---->       `RawTransferMetadata`
@@ -197,6 +271,9 @@ pub const ALLOC_LAYOUT: Layout = match Layout::from_size_align(ALLOC_SIZE, ALLOC
 /// * `BLOCK_SIZE` is a multiple of 16.
 /// * `RawTransferMetadata` is 16 bytes.
 /// * Size of `FixedSizeAllocatorMetadata` is rounded up to a multiple of 16.
+///
+/// TODO: This layout is a bit of a mess, and could be simplified.
+/// All the metadata could be stored inside the `Allocator` chunk.
 pub struct FixedSizeAllocator {
     /// `Allocator` which utilizes part of the original allocation
     allocator: ManuallyDrop<Allocator>,
@@ -205,7 +282,7 @@ pub struct FixedSizeAllocator {
 impl FixedSizeAllocator {
     /// Create a new [`FixedSizeAllocator`].
     #[expect(clippy::items_after_statements)]
-    pub fn new(id: u32) -> Self {
+    pub fn new(id: u32, worker_flags_count: WorkerFlagsCount) -> Self {
         // Only support little-endian systems. `Allocator::from_raw_parts` includes this same assertion.
         // This module is only compiled on 64-bit little-endian systems, so it should be impossible for
         // this panic to occur. But we want to make absolutely sure that if there's a mistake elsewhere,
@@ -269,24 +346,49 @@ impl FixedSizeAllocator {
             metadata_ptr.write(metadata);
         }
 
-        Self { allocator }
+        // Now that `FixedSizeAllocatorMetadata` is written, it's safe to wrap the `Allocator`
+        // in a `FixedSizeAllocator`.
+        // Do this now so the allocation gets freed in case of a panic later in this function.
+        let fixed_size_allocator = Self { allocator };
+        let allocator = &*fixed_size_allocator.allocator;
+
+        // Initialize worker thread flags.
+        // SAFETY: `data_end_ptr` is pointer to very end of the data region of the chunk.
+        // `worker_flags_count.get()` is guaranteed to be less than size of the chunk's data section,
+        // so subtracting it from `data_end_ptr` cannot go out of bounds of the chunk.
+        let flags_ptr = unsafe { allocator.data_end_ptr().sub(worker_flags_count.get()) };
+        // SAFETY: `flags_ptr` is valid for writing `flags_layout.size()` bytes.
+        // `u8` has no alignment requirements.
+        // These bytes will be read as `bool`s. 0 is a valid value for `bool` (`false`).
+        unsafe { flags_ptr.write_bytes(0, worker_flags_count.get()) };
+
+        // Set cursor to before flags, so they're not overwritten.
+        // SAFETY: `flags_ptr` points to within allocator chunk, and after chunk's data pointer.
+        unsafe { allocator.set_cursor_ptr(flags_ptr) };
+
+        fixed_size_allocator
     }
 
     /// Reset this [`FixedSizeAllocator`].
-    fn reset(&mut self) {
-        // Set cursor back to end
-        self.allocator.reset();
+    fn reset(&mut self, worker_flags_count: WorkerFlagsCount) {
+        // Set cursor back to end (but before the worker thread flags)
+        let data_end_ptr = self.allocator.data_end_ptr();
+        // SAFETY: `data_end_ptr` is pointer to very end of the data region of the chunk.
+        // `worker_flags_count.get()` is guaranteed to be less than size of the chunk's data section,
+        // so subtracting it from `data_end_ptr` cannot go out of bounds of the chunk.
+        let cursor_ptr = unsafe { data_end_ptr.sub(worker_flags_count.get()) };
+        // SAFETY: `cursor_ptr` points to within allocator chunk (see above)
+        unsafe { self.allocator.set_cursor_ptr(cursor_ptr) };
 
         // Set data pointer back to start.
         // SAFETY: Fixed-size allocators have data pointer originally aligned on `BLOCK_ALIGN`,
         // and size less than `BLOCK_ALIGN`. So we can restore original data pointer by rounding down
-        // to next multiple of `BLOCK_ALIGN`.
+        // any pointer to within the chunk to next multiple of `BLOCK_ALIGN`.
         // We're restoring the original data pointer, so it cannot break invariants about alignment,
         // being within the chunk's allocation, or being before cursor pointer.
         unsafe {
-            let data_ptr = self.allocator.data_ptr();
-            let offset = data_ptr.as_ptr() as usize % BLOCK_ALIGN;
-            let data_ptr = data_ptr.sub(offset);
+            let offset = cursor_ptr.as_ptr() as usize % BLOCK_ALIGN;
+            let data_ptr = cursor_ptr.sub(offset);
             self.allocator.set_data_ptr(data_ptr);
         }
     }

@@ -252,6 +252,8 @@ impl Runtime {
         // > In the future, the default behavior may change to dynamically add or remove threads as needed.
         // https://docs.rs/rayon/1.11.0/rayon/struct.ThreadPoolBuilder.html#method.num_threads
         //
+        // That behavior change would break the assumption that number of threads is constant after
+        // this call, which could result in writing out of bounds of the worker threads flags array.
         // However, I (@overlookmotel) assume that would be considered a breaking change,
         // so we don't have to worry about it until Rayon v2.
         // When Rayon v2 is released and we upgrade to it, we'll need to revisit this and make sure

diff --git a/napi/oxlint2/src/lib.rs b/napi/oxlint2/src/lib.rs
@@ -75,9 +75,13 @@ fn wrap_lint_file(cb: JsLintFileCb) -> ExternalLinterLintFileCb {
 
         let (tx, rx) = channel();
 
+        // At present, JS plugins all run on main thread, so `thread_id` is always 0
+        let thread_id = 0;
+
         // SAFETY: This crate enables the `fixed_size` feature on `oxc_allocator`,
-        // so all AST `Allocator`s are created via `FixedSizeAllocator`
-        let (buffer_id, buffer) = unsafe { get_buffer(allocator) };
+        // so all AST `Allocator`s are created via `FixedSizeAllocator`.
+        // `thread_id` is valid. There there's always at least 1 thread, so 0 cannot be too high.
+        let (buffer_id, buffer) = unsafe { get_buffer(allocator, thread_id) };
 
         // Send data to JS
         let status = cb.call_with_return_value(
@@ -108,26 +112,29 @@ fn wrap_lint_file(cb: JsLintFileCb) -> ExternalLinterLintFileCb {
     })
 }
 
-/// Get buffer ID of the `Allocator` and, if it hasn't already been sent to JS,
+/// Get buffer ID of the `Allocator` and, if it hasn't already been sent to this JS thread,
 /// create a `Uint8Array` referencing the `Allocator`'s memory.
 ///
-/// Each buffer is sent over to JS only once.
+/// Each buffer is sent over to each JS thread only once.
 /// JS side stores them in an array (indexed by buffer ID), and holds them until process ends.
-/// This means there's only ever 1 instance of a buffer on Rust side, and 1 on JS side,
+/// This means there's only ever 1 instance of a buffer on Rust side, and 1 on each JS thread,
 /// which makes it simpler to avoid use-after-free or double-free problems.
 ///
-/// So only create a `Uint8Array` if it's not already sent to JS.
+/// So only create a `Uint8Array` if it's not already sent to this JS thread.
 ///
-/// Whether the buffer has already been sent to JS is tracked by a reference counter
-/// in `FixedSizeAllocatorMetadata`, which is stored in memory backing the `Allocator`.
+/// Whether the buffer has already been send to this JS thread is tracked by a series of `bool` flags
+/// stored in the `Allocator`'s memory, just before the `ChunkFooter`.
+/// There's a `bool` for each thread in the Rayon global thread.
 ///
 /// # SAFETY
-/// `allocator` must have been created via `FixedSizeAllocator`
+/// * `allocator` must have been created via `FixedSizeAllocator`.
+/// * `thread_id` must be less than number of threads in Rayon global thread pool.
 unsafe fn get_buffer(
     allocator: &Allocator,
+    thread_id: usize,
 ) -> (
     u32,                // Buffer ID
-    Option<Uint8Array>, // Buffer, if not already sent to JS
+    Option<Uint8Array>, // Buffer, if not already sent to this JS thread
 ) {
     // SAFETY: Caller guarantees `Allocator` was created by a `FixedSizeAllocator`.
     // We only create an immutable ref from this pointer.
@@ -138,18 +145,38 @@ unsafe fn get_buffer(
 
     let buffer_id = metadata.id;
 
-    // Get whether this buffer has already been sent to JS
-    // TODO: Is `SeqCst` excessive here?
-    let old_ref_count = metadata.ref_count.swap(2, Ordering::SeqCst);
-    let already_sent_to_js = old_ref_count > 1;
-
-    // If buffer has already been sent to JS, don't send it again
-    if already_sent_to_js {
+    // Get whether this buffer has already been sent to this JS thread.
+    //
+    // This is tracked by a series of `bool` flags stored in the `Allocator`'s memory,
+    // just before the `ChunkFooter`.
+    // `FixedSizeAllocator` initialized N x `bool` flags, where N is the number of threads in Rayon's
+    // global thread pool.
+    // These flags reside in the slice of memory ranging from `data_end_ptr() - N` to `data_end_ptr() - 1`.
+    //
+    // We don't know how many threads there are here, so work backwards from the end.
+    // * Flag for thread 0 is at `data_end_ptr() - 1`
+    // * Flag for thread 1 is at `data_end_ptr() - 2`, etc.
+    //
+    // SAFETY: Caller guarantees `thread_id` is less than number of threads in Rayon global thread pool.
+    // Therefore `data_end_ptr() - (thread_id + 1)` points to the flag for this thread,
+    // and it must be a valid initialized `bool`.
+    let sent_to_js_thread =
+        unsafe { allocator.data_end_ptr().cast::<bool>().sub(thread_id + 1).as_mut() };
+
+    // If buffer has already been sent to this JS thread, don't send it again
+    if *sent_to_js_thread {
         return (buffer_id, None);
     }
 
     // Buffer has not already been sent to JS. Send it.
 
+    // Record that this buffer has now been sent to this JS thread
+    *sent_to_js_thread = true;
+
+    // Increment reference count for this allocator
+    // TODO: Is `SeqCst` excessive here?
+    metadata.ref_count.fetch_add(1, Ordering::SeqCst);
+
     // Get pointer to start of allocator chunk.
     // Note: `Allocator::data_ptr` would not provide the right pointer, because source text
     // gets written to start of the allocator chunk, and `data_ptr` gets moved to after it.