Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 115 additions & 13 deletions crates/oxc_allocator/src/pool_fixed_size.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::{
alloc::{self, GlobalAlloc, Layout, System},
mem::ManuallyDrop,
num::NonZeroUsize,
ops::Deref,
ptr::NonNull,
sync::{
Expand All @@ -19,23 +20,95 @@ use crate::{
const TWO_GIB: usize = 1 << 31;
const FOUR_GIB: usize = 1 << 32;

/// Maximum number of threads supported.
///
/// This is the same value that Rayon uses.
/// <https://github.com/rayon-rs/rayon/blob/7af20d7692d5decbcb4adcaa079cd607e3e50814/rayon-core/src/sleep/counters.rs#L55-L77>
///
/// We don't use `rayon::max_num_threads()` to get this value because Rayon's docs state that
/// "value may vary between different targets, and is subject to change in new Rayon versions".
/// <https://docs.rs/rayon/latest/rayon/fn.max_num_threads.html>
///
/// We need an absolute guarantee that maximum `WorkerFlagsCount` is less than `TWO_GIB` minus size of
/// all metadata, so worker thread flags definitely fit in the allocator chunk. So we hard-code it.
///
/// If Rayon decreases its max thread count in future, it's fine - thread count will never reach our limit.
/// If Rayon increases its max thread count, that's also not too problematic - we just impose a lower limit
/// than Rayon, and panic if that limit is exceeded. We can raise the limit if needed.
///
/// This is all academic anyway. It seems unlikely any machine running Oxc will have anywhere near
/// 65535 CPU cores!
const MAX_THREADS: usize = (1 << 16) - 1; // 65535

mod worker_flags_count {
use super::*;

/// Number of bytes for worker thread flags in a [`FixedSizeAllocator`].
///
/// Wrapper around `NonZeroUsize` which limits value to between 16 and `MAX_THREADS + 1`,
/// and a multiple of 16.
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct WorkerFlagsCount {
count: NonZeroUsize,
}

impl WorkerFlagsCount {
/// Create a new [`WorkerFlagsCount`] for the specified number of threads.
///
/// # Panics
/// Panics if `thread_count` is 0 or exceeds `MAX_THREADS`.
#[inline]
pub fn new(thread_count: usize) -> Self {
assert!(thread_count > 0, "Thread count cannot be 0");
assert!(thread_count <= MAX_THREADS, "Thread count cannot exceed {MAX_THREADS}");

// Worker flags are stored at end of allocator chunks, just before `ChunkFooter`.
// Using a multiple of 16 may make it a little faster to initialize the flags bytes.
let count = thread_count.next_multiple_of(16);
// SAFETY: We checked that `count > 0` above
let count = unsafe { NonZeroUsize::new_unchecked(count) };
WorkerFlagsCount { count }
}

/// Get the worker flags count.
#[inline]
pub fn get(self) -> usize {
self.count.get()
}
}
}
use worker_flags_count::WorkerFlagsCount;

/// A thread-safe pool for reusing [`Allocator`] instances to reduce allocation overhead.
///
/// Internally uses a `Vec` protected by a `Mutex` to store available allocators.
pub struct AllocatorPool {
/// Allocators in the pool
allocators: Mutex<Vec<FixedSizeAllocator>>,
/// Number of bytes reserved for flags indicating which JS worker threads have been sent an allocator
worker_flags_count: WorkerFlagsCount,
/// ID to assign to next `Allocator` that's created
next_id: AtomicU32,
}

impl AllocatorPool {
/// Creates a new [`AllocatorPool`] for use across the specified number of threads.
///
/// # Panics
/// Panics if `thread_count` is 0 or exceeds `MAX_THREADS`.
pub fn new(thread_count: usize) -> AllocatorPool {
let worker_flags_count = WorkerFlagsCount::new(thread_count);

// Each allocator consumes a large block of memory, so create them on demand instead of upfront,
// in case not all threads end up being used (e.g. language server without `import` plugin)
let allocators = Vec::with_capacity(thread_count);
AllocatorPool { allocators: Mutex::new(allocators), next_id: AtomicU32::new(0) }

AllocatorPool {
allocators: Mutex::new(allocators),
worker_flags_count,
next_id: AtomicU32::new(0),
}
}

/// Retrieves an [`Allocator`] from the pool, or creates a new one if the pool is empty.
Expand All @@ -58,7 +131,7 @@ impl AllocatorPool {
// Protect against IDs wrapping around.
// TODO: Does this work? Do we need it anyway?
assert!(id < u32::MAX, "Created too many allocators");
FixedSizeAllocator::new(id)
FixedSizeAllocator::new(id, self.worker_flags_count)
});

AllocatorGuard { allocator: ManuallyDrop::new(allocator), pool: self }
Expand Down Expand Up @@ -98,7 +171,7 @@ impl Drop for AllocatorGuard<'_> {
fn drop(&mut self) {
// SAFETY: After taking ownership of the `FixedSizeAllocator`, we do not touch the `ManuallyDrop` again
let mut allocator = unsafe { ManuallyDrop::take(&mut self.allocator) };
allocator.reset();
allocator.reset(self.pool.worker_flags_count);
self.pool.add(allocator);
}
}
Expand Down Expand Up @@ -179,7 +252,8 @@ pub const ALLOC_LAYOUT: Layout = match Layout::from_size_align(ALLOC_SIZE, ALLOC
/// ALLOCATOR
/// <-----------------------------------------> `Allocator` chunk (`CHUNK_SIZE` bytes)
/// <----> Bumpalo's `ChunkFooter` (aligned on 16)
/// <-----------------------------------> `Allocator` chunk data storage (for AST)
/// <----> Worker thread flags (aligned on 16)
/// <-----------------------------> `Allocator` chunk data storage (for AST)
///
/// METADATA
/// <----> `RawTransferMetadata`
Expand All @@ -197,6 +271,9 @@ pub const ALLOC_LAYOUT: Layout = match Layout::from_size_align(ALLOC_SIZE, ALLOC
/// * `BLOCK_SIZE` is a multiple of 16.
/// * `RawTransferMetadata` is 16 bytes.
/// * Size of `FixedSizeAllocatorMetadata` is rounded up to a multiple of 16.
///
/// TODO: This layout is a bit of a mess, and could be simplified.
/// All the metadata could be stored inside the `Allocator` chunk.
pub struct FixedSizeAllocator {
/// `Allocator` which utilizes part of the original allocation
allocator: ManuallyDrop<Allocator>,
Expand All @@ -205,7 +282,7 @@ pub struct FixedSizeAllocator {
impl FixedSizeAllocator {
/// Create a new [`FixedSizeAllocator`].
#[expect(clippy::items_after_statements)]
pub fn new(id: u32) -> Self {
pub fn new(id: u32, worker_flags_count: WorkerFlagsCount) -> Self {
// Only support little-endian systems. `Allocator::from_raw_parts` includes this same assertion.
// This module is only compiled on 64-bit little-endian systems, so it should be impossible for
// this panic to occur. But we want to make absolutely sure that if there's a mistake elsewhere,
Expand Down Expand Up @@ -269,24 +346,49 @@ impl FixedSizeAllocator {
metadata_ptr.write(metadata);
}

Self { allocator }
// Now that `FixedSizeAllocatorMetadata` is written, it's safe to wrap the `Allocator`
// in a `FixedSizeAllocator`.
// Do this now so the allocation gets freed in case of a panic later in this function.
let fixed_size_allocator = Self { allocator };
let allocator = &*fixed_size_allocator.allocator;

// Initialize worker thread flags.
// SAFETY: `data_end_ptr` is pointer to very end of the data region of the chunk.
// `worker_flags_count.get()` is guaranteed to be less than size of the chunk's data section,
// so subtracting it from `data_end_ptr` cannot go out of bounds of the chunk.
let flags_ptr = unsafe { allocator.data_end_ptr().sub(worker_flags_count.get()) };
// SAFETY: `flags_ptr` is valid for writing `flags_layout.size()` bytes.
// `u8` has no alignment requirements.
// These bytes will be read as `bool`s. 0 is a valid value for `bool` (`false`).
unsafe { flags_ptr.write_bytes(0, worker_flags_count.get()) };

// Set cursor to before flags, so they're not overwritten.
// SAFETY: `flags_ptr` points to within allocator chunk, and after chunk's data pointer.
unsafe { allocator.set_cursor_ptr(flags_ptr) };

fixed_size_allocator
}

/// Reset this [`FixedSizeAllocator`].
fn reset(&mut self) {
// Set cursor back to end
self.allocator.reset();
fn reset(&mut self, worker_flags_count: WorkerFlagsCount) {
// Set cursor back to end (but before the worker thread flags)
let data_end_ptr = self.allocator.data_end_ptr();
// SAFETY: `data_end_ptr` is pointer to very end of the data region of the chunk.
// `worker_flags_count.get()` is guaranteed to be less than size of the chunk's data section,
// so subtracting it from `data_end_ptr` cannot go out of bounds of the chunk.
let cursor_ptr = unsafe { data_end_ptr.sub(worker_flags_count.get()) };
// SAFETY: `cursor_ptr` points to within allocator chunk (see above)
unsafe { self.allocator.set_cursor_ptr(cursor_ptr) };

// Set data pointer back to start.
// SAFETY: Fixed-size allocators have data pointer originally aligned on `BLOCK_ALIGN`,
// and size less than `BLOCK_ALIGN`. So we can restore original data pointer by rounding down
// to next multiple of `BLOCK_ALIGN`.
// any pointer to within the chunk to next multiple of `BLOCK_ALIGN`.
// We're restoring the original data pointer, so it cannot break invariants about alignment,
// being within the chunk's allocation, or being before cursor pointer.
unsafe {
let data_ptr = self.allocator.data_ptr();
let offset = data_ptr.as_ptr() as usize % BLOCK_ALIGN;
let data_ptr = data_ptr.sub(offset);
let offset = cursor_ptr.as_ptr() as usize % BLOCK_ALIGN;
let data_ptr = cursor_ptr.sub(offset);
self.allocator.set_data_ptr(data_ptr);
}
}
Expand Down
2 changes: 2 additions & 0 deletions crates/oxc_linter/src/service/runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ impl Runtime {
// > In the future, the default behavior may change to dynamically add or remove threads as needed.
// https://docs.rs/rayon/1.11.0/rayon/struct.ThreadPoolBuilder.html#method.num_threads
//
// That behavior change would break the assumption that number of threads is constant after
// this call, which could result in writing out of bounds of the worker threads flags array.
// However, I (@overlookmotel) assume that would be considered a breaking change,
// so we don't have to worry about it until Rayon v2.
// When Rayon v2 is released and we upgrade to it, we'll need to revisit this and make sure
Expand Down
61 changes: 44 additions & 17 deletions napi/oxlint2/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,13 @@ fn wrap_lint_file(cb: JsLintFileCb) -> ExternalLinterLintFileCb {

let (tx, rx) = channel();

// At present, JS plugins all run on main thread, so `thread_id` is always 0
let thread_id = 0;

// SAFETY: This crate enables the `fixed_size` feature on `oxc_allocator`,
// so all AST `Allocator`s are created via `FixedSizeAllocator`
let (buffer_id, buffer) = unsafe { get_buffer(allocator) };
// so all AST `Allocator`s are created via `FixedSizeAllocator`.
// `thread_id` is valid. There there's always at least 1 thread, so 0 cannot be too high.
let (buffer_id, buffer) = unsafe { get_buffer(allocator, thread_id) };

// Send data to JS
let status = cb.call_with_return_value(
Expand Down Expand Up @@ -108,26 +112,29 @@ fn wrap_lint_file(cb: JsLintFileCb) -> ExternalLinterLintFileCb {
})
}

/// Get buffer ID of the `Allocator` and, if it hasn't already been sent to JS,
/// Get buffer ID of the `Allocator` and, if it hasn't already been sent to this JS thread,
/// create a `Uint8Array` referencing the `Allocator`'s memory.
///
/// Each buffer is sent over to JS only once.
/// Each buffer is sent over to each JS thread only once.
/// JS side stores them in an array (indexed by buffer ID), and holds them until process ends.
/// This means there's only ever 1 instance of a buffer on Rust side, and 1 on JS side,
/// This means there's only ever 1 instance of a buffer on Rust side, and 1 on each JS thread,
/// which makes it simpler to avoid use-after-free or double-free problems.
///
/// So only create a `Uint8Array` if it's not already sent to JS.
/// So only create a `Uint8Array` if it's not already sent to this JS thread.
///
/// Whether the buffer has already been sent to JS is tracked by a reference counter
/// in `FixedSizeAllocatorMetadata`, which is stored in memory backing the `Allocator`.
/// Whether the buffer has already been send to this JS thread is tracked by a series of `bool` flags
/// stored in the `Allocator`'s memory, just before the `ChunkFooter`.
/// There's a `bool` for each thread in the Rayon global thread.
///
/// # SAFETY
/// `allocator` must have been created via `FixedSizeAllocator`
/// * `allocator` must have been created via `FixedSizeAllocator`.
/// * `thread_id` must be less than number of threads in Rayon global thread pool.
unsafe fn get_buffer(
allocator: &Allocator,
thread_id: usize,
) -> (
u32, // Buffer ID
Option<Uint8Array>, // Buffer, if not already sent to JS
Option<Uint8Array>, // Buffer, if not already sent to this JS thread
) {
// SAFETY: Caller guarantees `Allocator` was created by a `FixedSizeAllocator`.
// We only create an immutable ref from this pointer.
Expand All @@ -138,18 +145,38 @@ unsafe fn get_buffer(

let buffer_id = metadata.id;

// Get whether this buffer has already been sent to JS
// TODO: Is `SeqCst` excessive here?
let old_ref_count = metadata.ref_count.swap(2, Ordering::SeqCst);
let already_sent_to_js = old_ref_count > 1;

// If buffer has already been sent to JS, don't send it again
if already_sent_to_js {
// Get whether this buffer has already been sent to this JS thread.
//
// This is tracked by a series of `bool` flags stored in the `Allocator`'s memory,
// just before the `ChunkFooter`.
// `FixedSizeAllocator` initialized N x `bool` flags, where N is the number of threads in Rayon's
// global thread pool.
// These flags reside in the slice of memory ranging from `data_end_ptr() - N` to `data_end_ptr() - 1`.
//
// We don't know how many threads there are here, so work backwards from the end.
// * Flag for thread 0 is at `data_end_ptr() - 1`
// * Flag for thread 1 is at `data_end_ptr() - 2`, etc.
//
// SAFETY: Caller guarantees `thread_id` is less than number of threads in Rayon global thread pool.
// Therefore `data_end_ptr() - (thread_id + 1)` points to the flag for this thread,
// and it must be a valid initialized `bool`.
let sent_to_js_thread =
unsafe { allocator.data_end_ptr().cast::<bool>().sub(thread_id + 1).as_mut() };

// If buffer has already been sent to this JS thread, don't send it again
if *sent_to_js_thread {
return (buffer_id, None);
}

// Buffer has not already been sent to JS. Send it.

// Record that this buffer has now been sent to this JS thread
*sent_to_js_thread = true;

// Increment reference count for this allocator
// TODO: Is `SeqCst` excessive here?
metadata.ref_count.fetch_add(1, Ordering::SeqCst);

// Get pointer to start of allocator chunk.
// Note: `Allocator::data_ptr` would not provide the right pointer, because source text
// gets written to start of the allocator chunk, and `data_ptr` gets moved to after it.
Expand Down
Loading