diff --git a/apps/oxlint/src/lint.rs b/apps/oxlint/src/lint.rs index 31fb353f1e7e6..5d040f1508668 100644 --- a/apps/oxlint/src/lint.rs +++ b/apps/oxlint/src/lint.rs @@ -352,21 +352,6 @@ impl CliRunner { .with_report_unused_directives(report_unused_directives); let number_of_files = files_to_lint.len(); - - // Due to the architecture of the import plugin and JS plugins, - // linting a large number of files with both enabled can cause resource exhaustion. - // See: https://github.com/oxc-project/oxc/issues/15863 - if number_of_files > 10_000 && use_cross_module && has_external_linter { - print_and_flush_stdout( - stdout, - &format!( - "Failed to run oxlint.\n{}\n", - render_report(&handler, &OxcDiagnostic::error(format!("Linting {number_of_files} files with both import plugin and JS plugins enabled can cause resource exhaustion.")).with_help("See https://github.com/oxc-project/oxc/issues/15863 for more details.")) - ), - ); - return CliRunResult::TooManyFilesWithImportAndJsPlugins; - } - let tsconfig = basic_options.tsconfig; if let Some(path) = tsconfig.as_ref() { if path.is_file() { @@ -404,10 +389,16 @@ impl CliRunner { } }; - // Configure the file system for external linter if needed + // Configure the file system for external linter if needed. + // When using the copy-to-fixed-allocator approach (cross-module + JS plugins), + // we use `OsFileSystem` instead of `RawTransferFileSystem`, because we use standard allocators for parsing. let file_system = if has_external_linter { #[cfg(all(feature = "napi", target_pointer_width = "64", target_endian = "little"))] - { + if use_cross_module { + // Use standard file system - source text will be copied to fixed-size allocator later + None + } else { + // Use raw transfer file system - source text goes directly to fixed-size allocator Some( &crate::js_plugins::RawTransferFileSystem as &(dyn oxc_linter::RuntimeFileSystem + Sync + Send), diff --git a/apps/oxlint/src/result.rs b/apps/oxlint/src/result.rs index 84b6f08f15e46..7c658f633defb 100644 --- a/apps/oxlint/src/result.rs +++ b/apps/oxlint/src/result.rs @@ -17,7 +17,6 @@ pub enum CliRunResult { ConfigFileInitFailed, ConfigFileInitSucceeded, TsGoLintError, - TooManyFilesWithImportAndJsPlugins, } impl Termination for CliRunResult { @@ -38,8 +37,7 @@ impl Termination for CliRunResult { | Self::InvalidOptionSeverityWithoutFilter | Self::InvalidOptionSeverityWithoutPluginName | Self::InvalidOptionSeverityWithoutRuleName - | Self::TsGoLintError - | Self::TooManyFilesWithImportAndJsPlugins => ExitCode::FAILURE, + | Self::TsGoLintError => ExitCode::FAILURE, } } } diff --git a/crates/oxc_allocator/src/pool/fixed_size.rs b/crates/oxc_allocator/src/pool/fixed_size.rs index 4e4b7bdd2a219..f6f9bbc579d83 100644 --- a/crates/oxc_allocator/src/pool/fixed_size.rs +++ b/crates/oxc_allocator/src/pool/fixed_size.rs @@ -134,10 +134,6 @@ impl FixedSizeAllocatorPool { return None; } - // Protect against IDs wrapping around. - // TODO: Does this work? Do we need it anyway? - assert!(id < u32::MAX, "Created too many allocators"); - // Try to claim this ID. If another thread got there first, retry with new ID. if self .next_id diff --git a/crates/oxc_linter/src/lib.rs b/crates/oxc_linter/src/lib.rs index e3acc1a417d55..d3000a92f2d74 100644 --- a/crates/oxc_linter/src/lib.rs +++ b/crates/oxc_linter/src/lib.rs @@ -7,7 +7,7 @@ use std::{ rc::Rc, }; -use oxc_allocator::Allocator; +use oxc_allocator::{Allocator, AllocatorPool, CloneIn}; use oxc_ast::{ast::Program, ast_kind::AST_TYPE_MAX}; use oxc_ast_macros::ast; use oxc_ast_visit::utf8_to_utf16::Utf8ToUtf16; @@ -148,11 +148,16 @@ impl Linter { context_sub_hosts: Vec>, allocator: &'a Allocator, ) -> Vec { - self.run_with_disable_directives(path, context_sub_hosts, allocator).0 + self.run_with_disable_directives(path, context_sub_hosts, allocator, None).0 } /// Same as `run` but also returns the disable directives for the file /// + /// # Parameters + /// - `js_allocator_pool`: Optional pool of fixed-size allocators for copying AST before JS transfer. + /// When `Some`, the AST will be copied into a fixed-size allocator before passing to JS plugins, + /// allowing the main allocator to be a standard (non-fixed-size) allocator. + /// /// # Panics /// Panics in debug mode if running with and without optimizations produces different diagnostic counts. pub fn run_with_disable_directives<'a>( @@ -160,6 +165,7 @@ impl Linter { path: &Path, context_sub_hosts: Vec>, allocator: &'a Allocator, + js_allocator_pool: Option<&AllocatorPool>, ) -> (Vec, Option) { let ResolvedLinterState { rules, config, external_rules } = self.config.resolve(path); @@ -350,7 +356,13 @@ impl Linter { // can mutably access `ctx_host` via `Rc::get_mut` without panicking due to multiple references. drop(rules); - self.run_external_rules(&external_rules, path, &mut ctx_host, allocator); + self.run_external_rules( + &external_rules, + path, + &mut ctx_host, + allocator, + js_allocator_pool, + ); // Report unused directives is now handled differently with type-aware linting @@ -382,65 +394,157 @@ impl Linter { (diagnostics, disable_directives) } + #[cfg(all(target_pointer_width = "64", target_endian = "little"))] fn run_external_rules<'a>( &self, external_rules: &[(ExternalRuleId, ExternalOptionsId, AllowWarnDeny)], path: &Path, ctx_host: &mut Rc>, allocator: &'a Allocator, + js_allocator_pool: Option<&AllocatorPool>, ) { if external_rules.is_empty() { return; } - // `external_linter` always exists when `external_rules` is not empty - let external_linter = self.external_linter.as_ref().unwrap(); + // If `js_allocator_pool` is provided, use clone-into-fixed-allocator approach + if let Some(js_pool) = js_allocator_pool { + self.clone_into_fixed_size_allocator_and_run_external_rules( + external_rules, + path, + ctx_host, + js_pool, + ); + return; + } + + // Extract `Semantic` from `ContextHost`, and get a mutable reference to `Program`. + // + // It's not possible to obtain a `&mut Program` while `Semantic` exists, because `Semantic` + // contains `AstNodes`, which contains `AstKind`s for every AST nodes, each of which contains + // an immutable `&` ref to an AST node. + // Obtaining a `&mut Program` while `Semantic` exists would be illegal aliasing. + // + // So instead we get a pointer to `Program`. + // The pointer is obtained initially from `&Program` in `Semantic`, but that pointer + // has no provenance for mutation, so can't be converted to `&mut Program`. + // So create a new pointer to `Program` which inherits `data_end_ptr`'s provenance, + // which does allow mutation. + // + // We then drop `Semantic`, after which no references to any AST nodes remain. + // We can then safety convert the pointer to `&mut Program`. + // + // `Program` was created in `allocator`, and that allocator is a `FixedSizeAllocator`, + // so only has 1 chunk. So `data_end_ptr` and `Program` are within the same allocation. + // All callers of `Linter::run` obtain `allocator` and `Semantic` from `ModuleContent`, + // which ensure they are in same allocation. + // However, we have no static guarantee of this, so strictly speaking it's unsound. + // TODO: It would be better to avoid the need for a `&mut Program` here, and so avoid this + // sketchy behavior. + let ctx_host = Rc::get_mut(ctx_host).unwrap(); + let semantic = mem::take(ctx_host.semantic_mut()); + let program_addr = NonNull::from(semantic.nodes().program()).addr(); + let mut program_ptr = + allocator.data_end_ptr().cast::>().with_addr(program_addr); + drop(semantic); + // SAFETY: Now that we've dropped `Semantic`, no references to any AST nodes remain, + // so can get a mutable reference to `Program` without aliasing violations. + let program = unsafe { program_ptr.as_mut() }; + + self.convert_and_call_external_linter(external_rules, path, ctx_host, allocator, program); + } + + #[cfg(not(all(target_pointer_width = "64", target_endian = "little")))] + fn run_external_rules<'a>( + &self, + _external_rules: &[(ExternalRuleId, ExternalOptionsId, AllowWarnDeny)], + _path: &Path, + _ctx_host: &mut Rc>, + _allocator: &'a Allocator, + _js_allocator_pool: Option<&AllocatorPool>, + ) { + // External rules (JS plugins) are not supported on non-64-bit or big-endian platforms + } - let (program_offset, source_text, span_converter) = { - // Extract `Semantic` from `ContextHost`, and get a mutable reference to `Program`. - // - // It's not possible to obtain a `&mut Program` while `Semantic` exists, because `Semantic` - // contains `AstNodes`, which contains `AstKind`s for every AST nodes, each of which contains - // an immutable `&` ref to an AST node. - // Obtaining a `&mut Program` while `Semantic` exists would be illegal aliasing. - // - // So instead we get a pointer to `Program`. - // The pointer is obtained initially from `&Program` in `Semantic`, but that pointer - // has no provenance for mutation, so can't be converted to `&mut Program`. - // So create a new pointer to `Program` which inherits `data_end_ptr`'s provenance, - // which does allow mutation. - // - // We then drop `Semantic`, after which no references to any AST nodes remain. - // We can then safety convert the pointer to `&mut Program`. - // - // `Program` was created in `allocator`, and that allocator is a `FixedSizeAllocator`, - // so only has 1 chunk. So `data_end_ptr` and `Program` are within the same allocation. - // All callers of `Linter::run` obtain `allocator` and `Semantic` from `ModuleContent`, - // which ensure they are in same allocation. - // However, we have no static guarantee of this, so strictly speaking it's unsound. - // TODO: It would be better to avoid the need for a `&mut Program` here, and so avoid this - // sketchy behavior. - let ctx_host = Rc::get_mut(ctx_host).unwrap(); - let semantic = mem::take(ctx_host.semantic_mut()); - let program_addr = NonNull::from(semantic.nodes().program()).addr(); - let mut program_ptr = - allocator.data_end_ptr().cast::>().with_addr(program_addr); - drop(semantic); - // SAFETY: Now that we've dropped `Semantic`, no references to any AST nodes remain, - // so can get a mutable reference to `Program` without aliasing violations. - let program = unsafe { program_ptr.as_mut() }; - - // Convert spans to UTF-16 - let span_converter = Utf8ToUtf16::new(program.source_text); - span_converter.convert_program(program); - span_converter.convert_comments(&mut program.comments); - - // Get offset of `Program` within buffer (bottom 32 bits of pointer) - let program_offset = ptr::from_ref(program) as u32; - - (program_offset, program.source_text, span_converter) + /// Clone AST into a fixed-size allocator and run external rules. + /// + /// This copies the AST and source text from the standard allocator into a fixed-size + /// allocator before passing to JS plugins. This allows using standard allocators for + /// parsing/linting while still supporting JS plugin raw transfer. + #[cfg(all(target_pointer_width = "64", target_endian = "little"))] + fn clone_into_fixed_size_allocator_and_run_external_rules( + &self, + external_rules: &[(ExternalRuleId, ExternalOptionsId, AllowWarnDeny)], + path: &Path, + ctx_host: &mut Rc>, + js_allocator_pool: &AllocatorPool, + ) { + let js_allocator = js_allocator_pool.get(); + + // Get the original source text and program from semantic + let ctx_host_ref = Rc::get_mut(ctx_host).unwrap(); + let semantic = mem::take(ctx_host_ref.semantic_mut()); + let original_program = semantic.nodes().program(); + let original_source_text = original_program.source_text; + + // Copy source text to the START of the fixed-size allocator. + // This is critical - the JS deserializer expects source text at offset 0. + // SAFETY: `js_allocator` is from a fixed-size allocator pool, which wraps the allocator + // in a custom `Drop` that doesn't actually drop it (it returns it to the pool), so the + // memory remains valid. This matches the safety requirements of `alloc_bytes_start`. + let new_source_text: &str = unsafe { + let bytes = original_source_text.as_bytes(); + let ptr = js_allocator.alloc_bytes_start(bytes.len()); + ptr::copy_nonoverlapping(bytes.as_ptr(), ptr.as_ptr(), bytes.len()); + std::str::from_utf8_unchecked(std::slice::from_raw_parts(ptr.as_ptr(), bytes.len())) }; + // Clone `Program` into fixed-size allocator. + // We need to allocate the `Program` struct ITSELF in the allocator, not just its contents. + // `clone_in` returns a value on the stack, but we need it in the allocator for raw transfer. + let program: &mut Program<'_> = { + let mut program = original_program.clone_in(&js_allocator); + program.source_text = new_source_text; + js_allocator.alloc(program) + }; + + // Drop semantic now that we've copied what we need + drop(semantic); + + self.convert_and_call_external_linter( + external_rules, + path, + ctx_host_ref, + &js_allocator, + program, + ); + + // The `AllocatorGuard` (`js_allocator`) is dropped here, returning the allocator to the pool. + // This ensures that we never have too many allocators in play at once, avoiding OOM. + } + + /// Convert spans to UTF-16, write metadata, call external linter, and process diagnostics. + /// + /// This is the common code path shared by both `run_external_rules` and + /// `clone_into_fixed_size_allocator_and_run_external_rules`. + fn convert_and_call_external_linter( + &self, + external_rules: &[(ExternalRuleId, ExternalOptionsId, AllowWarnDeny)], + path: &Path, + ctx_host: &ContextHost<'_>, + allocator: &Allocator, + program: &mut Program<'_>, + ) { + let source_text = program.source_text; + + // Convert spans to UTF-16 + let span_converter = Utf8ToUtf16::new(source_text); + span_converter.convert_program(program); + span_converter.convert_comments(&mut program.comments); + + // Get offset of `Program` within buffer (bottom 32 bits of pointer) + let program_offset = ptr::from_ref(program) as u32; + // Write offset of `Program` in metadata at end of buffer let metadata = RawTransferMetadata::new(program_offset); let metadata_ptr = allocator.end_ptr().cast::(); @@ -470,6 +574,9 @@ impl Linter { "{}".to_string() }); + // `external_linter` always exists when `external_rules` is not empty + let external_linter = self.external_linter.as_ref().unwrap(); + // Pass AST and rule IDs + options IDs to JS let result = (external_linter.lint_file)( path.to_owned(), diff --git a/crates/oxc_linter/src/service/runtime.rs b/crates/oxc_linter/src/service/runtime.rs index 8946ff9065035..ef7d6d71d00c6 100644 --- a/crates/oxc_linter/src/service/runtime.rs +++ b/crates/oxc_linter/src/service/runtime.rs @@ -45,8 +45,15 @@ pub struct Runtime { pub(super) linter: Linter, resolver: Option, + /// Pool of allocators for parsing and linting. allocator_pool: AllocatorPool, + /// Separate pool of fixed-size allocators for copying AST before JS transfer. + /// Only created when using the copy-to-fixed-allocator approach + /// (both import plugin and JS plugins enabled). + #[cfg(all(target_pointer_width = "64", target_endian = "little"))] + js_allocator_pool: Option, + /// The module graph keyed by module paths. It is looked up when populating `loaded_modules`. /// The values are module records of sections (check the docs of `ProcessedModule.section_module_records`) /// Its entries are kept across groups because modules discovered in former groups could be referenced by modules in latter groups. @@ -209,20 +216,42 @@ impl Runtime { let thread_count = rayon::current_num_threads(); - // If an external linter is used (JS plugins), we must use fixed-size allocators, - // for compatibility with raw transfer - let allocator_pool = if linter.has_external_linter() { - AllocatorPool::new_fixed_size(thread_count) + // Create allocator pools. + // + // * If both JS plugins and import plugin enabled, use copy-to-fixed-allocator approach. + // This approach is to use standard allocators for parsing/linting (lower memory usage), + // and copy ASTs to a fixed-size allocator only when passing to JS plugins. + // + // * If JS plugins, but no import plugin, use fixed-size allocators for everything. + // Without import plugin, there's no danger of memory exhaustion, as no more than + // ASTs are live at any given time. + // + // * If no JS plugins, use standard allocators for parsing/linting. + #[cfg(all(target_pointer_width = "64", target_endian = "little"))] + let (allocator_pool, js_allocator_pool) = if linter.has_external_linter() { + if options.cross_module { + ( + AllocatorPool::new(thread_count), + Some(AllocatorPool::new_fixed_size(thread_count)), + ) + } else { + (AllocatorPool::new_fixed_size(thread_count), None) + } } else { - AllocatorPool::new(thread_count) + (AllocatorPool::new(thread_count), None) }; + #[cfg(not(all(target_pointer_width = "64", target_endian = "little")))] + let allocator_pool = AllocatorPool::new(thread_count); + let resolver = options.cross_module.then(|| { Self::get_resolver(options.tsconfig.or_else(|| Some(options.cwd.join("tsconfig.json")))) }); Self { allocator_pool, + #[cfg(all(target_pointer_width = "64", target_endian = "little"))] + js_allocator_pool, cwd: options.cwd, linter, resolver, @@ -598,9 +627,18 @@ impl Runtime { return; } - let (mut messages, disable_directives) = me - .linter - .run_with_disable_directives(path, context_sub_hosts, allocator_guard); + #[cfg(all(target_pointer_width = "64", target_endian = "little"))] + let js_pool = me.js_allocator_pool.as_ref(); + #[cfg(not(all(target_pointer_width = "64", target_endian = "little")))] + let js_pool = None::<&AllocatorPool>; + + let (mut messages, disable_directives) = + me.linter.run_with_disable_directives( + path, + context_sub_hosts, + allocator_guard, + js_pool, + ); // Store the disable directives for this file if let Some(disable_directives) = disable_directives { @@ -712,9 +750,15 @@ impl Runtime { } let path = Path::new(&module_to_lint.path); + + #[cfg(all(target_pointer_width = "64", target_endian = "little"))] + let js_pool = me.js_allocator_pool.as_ref(); + #[cfg(not(all(target_pointer_width = "64", target_endian = "little")))] + let js_pool = None::<&AllocatorPool>; + let (section_messages, disable_directives) = me .linter - .run_with_disable_directives(path, context_sub_hosts, allocator_guard); + .run_with_disable_directives(path, context_sub_hosts, allocator_guard, js_pool); if let Some(disable_directives) = disable_directives { me.disable_directives_map