diff --git a/Cargo.lock b/Cargo.lock index 6076f66bc3e8d..cd25bfe085523 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2496,6 +2496,7 @@ version = "1.12.0" dependencies = [ "bpaf", "cow-utils", + "futures", "ignore", "insta", "lazy-regex", @@ -2512,6 +2513,7 @@ dependencies = [ "serde_json", "simdutf8", "tempfile", + "tokio", "tracing-subscriber", ] diff --git a/apps/oxlint/Cargo.toml b/apps/oxlint/Cargo.toml index 04a62b143f89c..b8fe3e6d0c650 100644 --- a/apps/oxlint/Cargo.toml +++ b/apps/oxlint/Cargo.toml @@ -34,6 +34,7 @@ oxc_span = { workspace = true } bpaf = { workspace = true, features = ["autocomplete", "bright-color", "derive"] } cow-utils = { workspace = true } +futures = { workspace = true, optional = true } ignore = { workspace = true, features = ["simd-accel"] } miette = { workspace = true } napi = { workspace = true } @@ -43,6 +44,7 @@ serde = { workspace = true } serde_json = { workspace = true } simdutf8 = { workspace = true, optional = true } tempfile = { workspace = true } +tokio = { workspace = true, optional = true } tracing-subscriber = { workspace = true, features = [] } # Omit the `regex` feature [target.'cfg(not(any(target_os = "linux", target_os = "freebsd", target_arch = "arm", target_family = "wasm")))'.dependencies] @@ -61,6 +63,6 @@ lazy-regex = { workspace = true } [features] default = [] allocator = ["dep:mimalloc-safe"] -oxlint2 = ["oxc_linter/oxlint2", "oxc_allocator/fixed_size", "dep:simdutf8"] +oxlint2 = ["oxc_linter/oxlint2", "oxc_allocator/fixed_size", "dep:futures", "dep:simdutf8", "dep:tokio"] disable_oxlint2 = ["oxc_linter/disable_oxlint2", "oxc_allocator/disable_fixed_size"] force_test_reporter = ["oxc_linter/force_test_reporter"] diff --git a/apps/oxlint/src/lib.rs b/apps/oxlint/src/lib.rs index 9c16eb32dfd57..9c7fb727324a6 100644 --- a/apps/oxlint/src/lib.rs +++ b/apps/oxlint/src/lib.rs @@ -1,8 +1,9 @@ use std::io::BufWriter; pub use oxc_linter::{ - ExternalLinter, ExternalLinterLintFileCb, ExternalLinterLoadPluginCb, LintFileResult, - PluginLoadResult, + ExternalLinter, ExternalLinterInitWorkerThreadsCb, ExternalLinterLintFileCb, + ExternalLinterLoadPluginCb, ExternalLinterLoadPluginsCb, ExternalLinterWorkerCallbacks, + LintFileResult, PluginLoadResult, }; mod command; diff --git a/apps/oxlint/src/lint.rs b/apps/oxlint/src/lint.rs index 98f30adcef49a..65c76f38186f2 100644 --- a/apps/oxlint/src/lint.rs +++ b/apps/oxlint/src/lint.rs @@ -289,6 +289,13 @@ impl LintRunner { let (mut diagnostic_service, tx_error) = Self::get_diagnostic_service(&output_formatter, &warning_options, &misc_options); + #[allow(unused_mut, clippy::allow_attributes)] + let mut external_linter = self.external_linter; + #[cfg(all(feature = "oxlint2", not(feature = "disable_oxlint2")))] + { + Self::init_js_worker_threads(external_linter.as_mut(), &external_plugin_store); + } + let config_store = ConfigStore::new(lint_config, nested_configs, external_plugin_store); let files_to_lint = paths @@ -308,7 +315,7 @@ impl LintRunner { } } - let linter = Linter::new(LintOptions::default(), config_store, self.external_linter) + let linter = Linter::new(LintOptions::default(), config_store, external_linter) .with_fix(fix_options.fix_kind()) .with_report_unused_directives(report_unused_directives); @@ -372,6 +379,52 @@ impl LintRunner { CliRunResult::LintSucceeded } } + + #[cfg(all(feature = "oxlint2", not(feature = "disable_oxlint2")))] + fn init_js_worker_threads( + external_linter: Option<&mut ExternalLinter>, + external_plugin_store: &ExternalPluginStore, + ) { + use futures::future::try_join_all; + + // If no JS plugins used, exit + if external_plugin_store.plugin_paths().is_empty() { + return; + } + + // Start 1 JS worker thread for each rayon thread + let external_linter = external_linter.unwrap(); + let thread_count = u32::try_from(rayon::current_num_threads()).unwrap(); + + let callbacks = external_linter.init_worker_threads(thread_count); + + // Load all plugins on each worker thread + let plugin_paths = external_plugin_store.plugin_paths().iter().cloned().collect::>(); + + tokio::task::block_in_place(|| { + tokio::runtime::Handle::current().block_on({ + let callbacks = &*callbacks; + async move { + let futures = callbacks + .iter() + .map(|callbacks| (callbacks.load_plugins)(plugin_paths.clone())); + + let joined = try_join_all(futures).await; + joined.map(|_| ()) + } + }) + }) + .unwrap(); + + // Store JS functions to lint a file in `ExternalLinter`. + // 1 function for each worker thread. + let lint_file_on_threads = callbacks + .into_iter() + .map(|callbacks| callbacks.lint_file) + .collect::>() + .into_boxed_slice(); + external_linter.set_lint_file_on_threads(lint_file_on_threads); + } } impl LintRunner { diff --git a/crates/oxc_linter/Cargo.toml b/crates/oxc_linter/Cargo.toml index 760efc182b541..810ec62fcfc76 100644 --- a/crates/oxc_linter/Cargo.toml +++ b/crates/oxc_linter/Cargo.toml @@ -72,7 +72,7 @@ serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, features = ["preserve_order"] } # preserve_order: print config with ordered keys. simdutf8 = { workspace = true } smallvec = { workspace = true } -tokio = { workspace = true, optional = true } +tokio = { workspace = true, features = ["rt-multi-thread"], optional = true } [dev-dependencies] insta = { workspace = true } diff --git a/crates/oxc_linter/src/config/config_builder.rs b/crates/oxc_linter/src/config/config_builder.rs index 7d7b0b680590a..53febcba2b2d2 100644 --- a/crates/oxc_linter/src/config/config_builder.rs +++ b/crates/oxc_linter/src/config/config_builder.rs @@ -533,17 +533,12 @@ impl ConfigStoreBuilder { return Ok(()); } - let result = { - let plugin_path = plugin_path.clone(); - tokio::task::block_in_place(move || { - tokio::runtime::Handle::current() - .block_on((external_linter.load_plugin)(plugin_path)) - }) - .map_err(|e| ConfigBuilderError::PluginLoadFailed { + let result = external_linter.load_plugin(&plugin_path).map_err(|error| { + ConfigBuilderError::PluginLoadFailed { plugin_specifier: plugin_specifier.to_string(), - error: e.to_string(), - }) - }?; + error, + } + })?; match result { PluginLoadResult::Success { name, offset, rule_names } => { diff --git a/crates/oxc_linter/src/external_linter.rs b/crates/oxc_linter/src/external_linter.rs index 284e5512a75c4..5ef569009bf8b 100644 --- a/crates/oxc_linter/src/external_linter.rs +++ b/crates/oxc_linter/src/external_linter.rs @@ -4,22 +4,45 @@ use serde::{Deserialize, Serialize}; use oxc_allocator::Allocator; -pub type ExternalLinterLoadPluginCb = Arc< +/// Callback functions for a JS worker thread. +pub struct ExternalLinterWorkerCallbacks { + pub load_plugins: ExternalLinterLoadPluginsCb, + pub lint_file: ExternalLinterLintFileCb, +} + +/// Initialize JS worker threads. +pub type ExternalLinterInitWorkerThreadsCb = Arc< dyn Fn( - String, + u32, ) -> Pin< - Box< - dyn Future< - Output = Result>, - > + Send, - >, + Box, String>> + Send>, > + Send + Sync + 'static, >; -pub type ExternalLinterLintFileCb = - Arc, &Allocator) -> Result, String> + Sync + Send>; +/// Load a JS plugin on main thread. +pub type ExternalLinterLoadPluginCb = Arc< + dyn Fn(String) -> Pin> + Send>> + + Send + + Sync + + 'static, +>; + +/// Load multiple JS plugins on a worker thread. +pub type ExternalLinterLoadPluginsCb = Arc< + dyn Fn(Vec) -> Pin> + Send>> + + Send + + Sync + + 'static, +>; + +/// Lint a file on a worker thread. +pub type ExternalLinterLintFileCb = Arc< + dyn Fn(String, Vec, &Allocator, usize) -> Result, String> + + Sync + + Send, +>; #[derive(Clone, Debug, Deserialize, Serialize)] pub enum PluginLoadResult { @@ -49,16 +72,90 @@ pub struct Loc { #[derive(Clone)] #[cfg_attr(not(all(feature = "oxlint2", not(feature = "disable_oxlint2"))), expect(dead_code))] pub struct ExternalLinter { - pub(crate) load_plugin: ExternalLinterLoadPluginCb, - pub(crate) lint_file: ExternalLinterLintFileCb, + init_worker_threads: ExternalLinterInitWorkerThreadsCb, + load_plugin: ExternalLinterLoadPluginCb, + lint_file_fns: Box<[ExternalLinterLintFileCb]>, } impl ExternalLinter { pub fn new( + init_worker_threads: ExternalLinterInitWorkerThreadsCb, load_plugin: ExternalLinterLoadPluginCb, - lint_file: ExternalLinterLintFileCb, ) -> Self { - Self { load_plugin, lint_file } + Self { init_worker_threads, load_plugin, lint_file_fns: Box::new([]) } + } + + /// Initialize JS worker threads. + /// + /// # Panics + /// + /// Panics if either: + /// * The current thread is not a Tokio runtime thread. + /// * The JS worker threads failed to initialize. + #[cfg(all(feature = "oxlint2", not(feature = "disable_oxlint2")))] + pub fn init_worker_threads(&self, thread_count: u32) -> Box<[ExternalLinterWorkerCallbacks]> { + tokio::task::block_in_place(|| { + tokio::runtime::Handle::current().block_on((self.init_worker_threads)(thread_count)) + }) + .unwrap() + } + + #[cfg(not(all(feature = "oxlint2", not(feature = "disable_oxlint2"))))] + #[expect(unused_variables, clippy::unused_self)] + pub fn init_worker_threads(&self, thread_count: u32) -> Box<[ExternalLinterWorkerCallbacks]> { + unreachable!(); + } + + /// Set the lint file callbacks for each worker thread. + pub fn set_lint_file_on_threads( + &mut self, + lint_file_on_threads: Box<[ExternalLinterLintFileCb]>, + ) { + self.lint_file_fns = lint_file_on_threads; + } + + /// Load a JS plugin on main thread. + /// + /// # Errors + /// Returns `Err` if error on JS side. + /// + /// # Panics + /// Panics if the current thread is not a Tokio runtime thread. + #[cfg(all(feature = "oxlint2", not(feature = "disable_oxlint2")))] + pub fn load_plugin(&self, plugin_path: &str) -> Result { + let plugin_path = plugin_path.to_string(); + tokio::task::block_in_place(move || { + tokio::runtime::Handle::current().block_on((self.load_plugin)(plugin_path)) + }) + } + + #[cfg(not(all(feature = "oxlint2", not(feature = "disable_oxlint2"))))] + #[expect(unused_variables, clippy::unused_self, clippy::missing_errors_doc)] + pub fn load_plugin(&self, plugin_path: &str) -> Result { + unreachable!(); + } + + /// Lint a file. + /// + /// # Errors + /// + /// Returns `Err` if: + /// * Error on JS side. + /// * Current thread is not a Rayon thread. + #[expect(clippy::unnecessary_safety_comment)] + pub fn lint_file( + &self, + file_path: String, + rule_ids: Vec, + allocator: &Allocator, + ) -> Result, String> { + let thread_id = rayon::current_thread_index() + .ok_or_else(|| String::from("Current thread must be a Rayon thread"))?; + + let lint_file = &self.lint_file_fns[thread_id]; + // SAFETY: We have verified that the current thread is a Rayon thread. + // `rayon::current_thread_index()` always returns a number less than the number of threads in pool. + lint_file(file_path, rule_ids, allocator, thread_id) } } diff --git a/crates/oxc_linter/src/external_plugin_store.rs b/crates/oxc_linter/src/external_plugin_store.rs index dbae90ebfdc83..8f9131c211f2f 100644 --- a/crates/oxc_linter/src/external_plugin_store.rs +++ b/crates/oxc_linter/src/external_plugin_store.rs @@ -1,9 +1,12 @@ use std::fmt; -use rustc_hash::{FxHashMap, FxHashSet}; +use indexmap::IndexSet; +use rustc_hash::{FxBuildHasher, FxHashMap}; use oxc_index::{IndexVec, define_index_type}; +type FxIndexSet = IndexSet; + define_index_type! { pub struct ExternalPluginId = u32; } @@ -14,7 +17,7 @@ define_index_type! { #[derive(Debug, Default)] pub struct ExternalPluginStore { - registered_plugin_paths: FxHashSet, + registered_plugin_paths: FxIndexSet, plugins: IndexVec, plugin_names: FxHashMap, @@ -22,6 +25,10 @@ pub struct ExternalPluginStore { } impl ExternalPluginStore { + pub fn plugin_paths(&self) -> &FxIndexSet { + &self.registered_plugin_paths + } + pub fn is_plugin_registered(&self, plugin_path: &str) -> bool { self.registered_plugin_paths.contains(plugin_path) } diff --git a/crates/oxc_linter/src/lib.rs b/crates/oxc_linter/src/lib.rs index bea97b3b7bcc2..31a36b8ccc07a 100644 --- a/crates/oxc_linter/src/lib.rs +++ b/crates/oxc_linter/src/lib.rs @@ -46,8 +46,9 @@ pub use crate::{ }, context::LintContext, external_linter::{ - ExternalLinter, ExternalLinterLintFileCb, ExternalLinterLoadPluginCb, LintFileResult, - PluginLoadResult, + ExternalLinter, ExternalLinterInitWorkerThreadsCb, ExternalLinterLintFileCb, + ExternalLinterLoadPluginCb, ExternalLinterLoadPluginsCb, ExternalLinterWorkerCallbacks, + LintFileResult, PluginLoadResult, }, external_plugin_store::{ExternalPluginStore, ExternalRuleId}, fixer::FixKind, @@ -260,7 +261,7 @@ impl Linter { unsafe { metadata_ptr.write(metadata) }; // Pass AST and rule IDs to JS - let result = (external_linter.lint_file)( + let result = external_linter.lint_file( path.to_str().unwrap().to_string(), external_rules.iter().map(|(rule_id, _)| rule_id.raw()).collect(), allocator, diff --git a/napi/oxlint2/src-js/assertIs.ts b/napi/oxlint2/src-js/assertIs.ts new file mode 100644 index 0000000000000..5ad18087d88d1 --- /dev/null +++ b/napi/oxlint2/src-js/assertIs.ts @@ -0,0 +1,10 @@ +/** + * Assert a value is of a certain type. + * + * Has no runtime effect - only for guiding the type-checker. + * Minification removes this function and all calls to it, so it has zero runtime cost. + * + * @param value - Value + */ +// oxlint-disable-next-line no-unused-vars +export default function assertIs(value: unknown): asserts value is T {} diff --git a/napi/oxlint2/src-js/bindings.d.ts b/napi/oxlint2/src-js/bindings.d.ts index e4326c134a2a2..07314640fc904 100644 --- a/napi/oxlint2/src-js/bindings.d.ts +++ b/napi/oxlint2/src-js/bindings.d.ts @@ -1,9 +1,22 @@ /* auto-generated by NAPI-RS */ /* eslint-disable */ +/** Initialize JS worker threads. */ +export type JsInitWorkerThreadsCb = + ((arg: number) => Promise) + +/** Lint a file on a worker thread. */ export type JsLintFileCb = ((arg0: string, arg1: number, arg2: Uint8Array | undefined | null, arg3: Array) => string) +/** Load a JS plugin on main thread. */ export type JsLoadPluginCb = ((arg: string) => Promise) -export declare function lint(loadPlugin: JsLoadPluginCb, lintFile: JsLintFileCb): Promise +/** Load multiple JS plugins on a worker thread. */ +export type JsLoadPluginsCb = + ((arg: Array) => Promise) + +export declare function lint(initWorkerThreads: JsInitWorkerThreadsCb, loadPlugin: JsLoadPluginCb): Promise + +/** Register a JS worker thread. */ +export declare function registerWorker(loadPlugins: JsLoadPluginsCb, lintFile: JsLintFileCb): void diff --git a/napi/oxlint2/src-js/bindings.js b/napi/oxlint2/src-js/bindings.js index ee7b497d72d70..e23459a0dc74c 100644 --- a/napi/oxlint2/src-js/bindings.js +++ b/napi/oxlint2/src-js/bindings.js @@ -393,5 +393,6 @@ if (!nativeBinding) { throw new Error(`Failed to load native binding`) } -const { lint } = nativeBinding +const { lint, registerWorker } = nativeBinding export { lint } +export { registerWorker } diff --git a/napi/oxlint2/src-js/context.ts b/napi/oxlint2/src-js/context.ts new file mode 100644 index 0000000000000..7c18bf4391761 --- /dev/null +++ b/napi/oxlint2/src-js/context.ts @@ -0,0 +1,90 @@ +import type { Node, Visitor } from './types.ts'; + +// Linter plugin +export interface Plugin { + meta: { + name: string; + }; + rules: { + [key: string]: Rule; + }; +} + +// Linter rule +export interface Rule { + create: (context: Context) => Visitor; +} + +// Diagnostic passed to `Context#report` +export interface Diagnostic { + message: string; + node: Node; +} + +// Diagnostic as stored internally in `diagnostics` +export interface DiagnosticReport { + message: string; + loc: { start: number; end: number }; + ruleIndex: number; +} + +// Diagnostics array. Reused for every file. +export const diagnostics: DiagnosticReport[] = []; + +/** + * Update a `Context` with file-specific data. + * + * We have to define this function within class body, as it's not possible to set private property + * `#ruleIndex` from outside the class. + * We don't use a normal class method, because we don't want to expose this to user. + * + * @param context - `Context` object + * @param ruleIndex - Index of this rule within `ruleIds` passed from Rust + * @param filePath - Absolute path of file being linted + */ +export let setupContextForFile: (context: Context, ruleIndex: number, filePath: string) => void; + +/** + * Context class. + * + * Each rule has its own `Context` object. It is passed to that rule's `create` function. + */ +export class Context { + // Full rule name, including plugin name e.g. `my-plugin/my-rule`. + id: string; + // Index into `ruleIds` sent from Rust. Set before calling `rule`'s `create` method. + #ruleIndex: number; + // Absolute path of file being linted. Set before calling `rule`'s `create` method. + filename: string; + // Absolute path of file being linted. Set before calling `rule`'s `create` method. + physicalFilename: string; + + /** + * @class + * @param fullRuleName - Rule name, in form `/` + */ + constructor(fullRuleName: string) { + this.id = fullRuleName; + } + + /** + * Report error. + * @param diagnostic - Diagnostic object + */ + report(diagnostic: Diagnostic): void { + const { node } = diagnostic; + diagnostics.push({ + message: diagnostic.message, + loc: { start: node.start, end: node.end }, + ruleIndex: this.#ruleIndex, + }); + } + + static { + setupContextForFile = (context, ruleIndex, filePath) => { + context.#ruleIndex = ruleIndex; + context.filename = filePath; + context.physicalFilename = filePath; + }; + } +} diff --git a/napi/oxlint2/src-js/index.ts b/napi/oxlint2/src-js/index.ts index 0f2f2d0a43160..fcfcfbf8b2645 100644 --- a/napi/oxlint2/src-js/index.ts +++ b/napi/oxlint2/src-js/index.ts @@ -1,69 +1,27 @@ -import { createRequire } from 'node:module'; +import { Worker } from 'node:worker_threads'; import { lint } from './bindings.js'; -import { - DATA_POINTER_POS_32, - SOURCE_LEN_OFFSET, - // TODO(camc314): we need to generate `.d.ts` file for this module. - // @ts-expect-error -} from './generated/constants.cjs'; -import { assertIs, getErrorMessage } from './utils.js'; -import { addVisitorToCompiled, compiledVisitor, finalizeCompiledVisitor, initCompiledVisitor } from './visitor.js'; -import type { Visitor } from './types.ts'; +import type { Plugin } from './context.ts'; -// Import methods and objects from `oxc-parser`. -// Use `require` not `import` as `oxc-parser` uses `require` internally, -// and need to make sure get same instance of modules as it uses internally, -// otherwise `TOKEN` here won't be same `TOKEN` as used within `oxc-parser`. -const require = createRequire(import.meta.url); -const { TOKEN } = require('../dist/parser/raw-transfer/lazy-common.cjs'), - walkProgram = require('../dist/parser/generated/lazy/walk.cjs'); +const WORKER_URL = new URL('./worker.js', import.meta.url); + +const LOG = false; // -------------------- -// Plugin loading +// Plugin registration // -------------------- -interface Diagnostic { - message: string; - node: { - start: number; - end: number; - [key: string]: unknown; - }; -} - -interface DiagnosticReport { - message: string; - loc: { start: number; end: number }; - ruleIndex: number; -} - -interface Rule { - create: (context: Context) => Visitor; -} - -interface Plugin { - meta: { - name: string; - }; - rules: { - [key: string]: Rule; - }; -} - // Absolute paths of plugins which have been loaded const registeredPluginPaths = new Set(); -// Rule objects for loaded rules. -// Indexed by `ruleId`, passed to `lintFile`. -const registeredRules: { - rule: Rule; - context: Context; -}[] = []; +// Count of rules registered so far +let registeredRuleCount = 0; /** * Load a plugin. * + * Called from Rust. + * * Main logic is in separate function `loadPluginImpl`, because V8 cannot optimize functions * containing try/catch. * @@ -90,179 +48,79 @@ async function loadPluginImpl(path: string): Promise { registeredPluginPaths.add(path); // TODO: Use a validation library to assert the shape of the plugin, and of rules - const pluginName = plugin.meta.name; - const offset = registeredRules.length; - const ruleNames = []; + const offset = registeredRuleCount; + const ruleNames = Object.keys(plugin.rules); + registeredRuleCount += ruleNames.length; - for (const [ruleName, rule] of Object.entries(plugin.rules)) { - ruleNames.push(ruleName); - registeredRules.push({ - rule, - context: new Context(`${pluginName}/${ruleName}`), - }); - } - - return JSON.stringify({ Success: { name: pluginName, offset, ruleNames } }); + return JSON.stringify({ Success: { name: plugin.meta.name, offset, ruleNames } }); } /** - * Update a `Context` with file-specific data. + * Get error message from an error. * - * We have to define this function within class body, as it's not possible to set private property - * `#ruleIndex` from outside the class. - * We don't use a normal class method, because we don't want to expose this to user. + * `err` is expected to be an `Error` object, but can be anything. * - * @param context - `Context` object - * @param ruleIndex - Index of this rule within `ruleIds` passed from Rust - * @param filePath - Absolute path of file being linted - */ -let setupContextForFile: ( - context: Context, - ruleIndex: number, - filePath: string, -) => void; - -/** - * Context class. + * This function will never throw, and always returns a string, even if: + * + * * `err` is `null` or `undefined`. + * * `err` is an object with a getter for `message` property which throws. + * * `err` has a getter for `message` property which returns a different value each time it's accessed. * - * Each rule has its own `Context` object. It is passed to that rule's `create` function. + * @param err - Error + * @returns Error message */ -class Context { - // Full rule name, including plugin name e.g. `my-plugin/my-rule`. - id: string; - // Index into `ruleIds` sent from Rust. Set before calling `rule`'s `create` method. - #ruleIndex: number; - // Absolute path of file being linted. Set before calling `rule`'s `create` method. - filename: string; - // Absolute path of file being linted. Set before calling `rule`'s `create` method. - physicalFilename: string; - - /** - * @class - * @param fullRuleName - Rule name, in form `/` - */ - constructor(fullRuleName: string) { - this.id = fullRuleName; - } - - /** - * Report error. - * @param diagnostic - Diagnostic object - */ - report(diagnostic: Diagnostic): void { - diagnostics.push({ - message: diagnostic.message, - loc: { start: diagnostic.node.start, end: diagnostic.node.end }, - ruleIndex: this.#ruleIndex, - }); - } +function getErrorMessage(err: unknown): string { + try { + const { message } = err as undefined | { message: string }; + if (typeof message === 'string' && message !== '') return message; + } catch {} - static { - setupContextForFile = (context, ruleIndex, filePath) => { - context.#ruleIndex = ruleIndex; - context.filename = filePath; - context.physicalFilename = filePath; - }; - } + return 'Unknown error'; } // -------------------- -// Running rules +// Worker threads // -------------------- -interface BufferWithArrays extends Uint8Array { - uint32: Uint32Array; - float64: Float64Array; -} - -// Buffers cache. -// -// All buffers sent from Rust are stored in this array, indexed by `bufferId` (also sent from Rust). -// Buffers are only added to this array, never removed, so no buffers will be garbage collected -// until the process exits. -const buffers: (BufferWithArrays | null)[] = []; - -// Diagnostics array. Reused for every file. -const diagnostics: DiagnosticReport[] = []; - -// Text decoder, for decoding source text from buffer -const textDecoder = new TextDecoder('utf-8', { ignoreBOM: true }); - -// Run rules on a file. -function lintFile(filePath: string, bufferId: number, buffer: Uint8Array | null, ruleIds: number[]) { - // If new buffer, add it to `buffers` array. Otherwise, get existing buffer from array. - // Do this before checks below, to make sure buffer doesn't get garbage collected when not expected - // if there's an error. - // TODO: Is this enough to guarantee soundness? - if (buffer === null) { - // Rust will only send a `bufferId` alone, if it previously sent a buffer with this same ID - buffer = buffers[bufferId]!; - } else { - assertIs(buffer); - const { buffer: arrayBuffer, byteOffset } = buffer; - buffer.uint32 = new Uint32Array(arrayBuffer, byteOffset); - buffer.float64 = new Float64Array(arrayBuffer, byteOffset); +const workers: Worker[] = []; - for (let i = bufferId - buffers.length; i >= 0; i--) { - buffers.push(null); +/** + * Initialize worker threads. + * + * Called from Rust. + * + * @param {number} threadCount - Number of worker threads to create + * @returns {undefined} + */ +async function initWorkerThreads(threadCount: number): Promise { + // if (LOG) console.log('> Starting', threadCount, 'workers'); + + return new Promise((resolve) => { + let remainingCount = threadCount; + function done(_: any): void { + if (--remainingCount === 0) { + resolve(void 0); + // if (LOG) console.log('> Started', threadCount, 'workers'); + } } - buffers[bufferId] = buffer; - } - assertIs(buffer); - if (typeof filePath !== 'string' || filePath.length === 0) { - throw new Error('expected filePath to be a non-zero length string'); - } - if (!Array.isArray(ruleIds) || ruleIds.length === 0) { - throw new Error('Expected `ruleIds` to be a non-zero len array'); - } - - // Get visitors for this file from all rules - initCompiledVisitor(); - for (let i = 0; i < ruleIds.length; i++) { - const ruleId = ruleIds[i]; - const { rule, context } = registeredRules[ruleId]; - setupContextForFile(context, i, filePath); - const visitor = rule.create(context); - addVisitorToCompiled(visitor); - } - const needsVisit = finalizeCompiledVisitor(); - - // Visit AST. - // Skip this if no visitors visit any nodes. - // Some rules seen in the wild return an empty visitor object from `create` if some initial check fails - // e.g. file extension is not one the rule acts on. - if (needsVisit) { - const { uint32 } = buffer, - programPos = uint32[DATA_POINTER_POS_32], - sourceByteLen = uint32[(programPos + SOURCE_LEN_OFFSET) >> 2]; - - const sourceText = textDecoder.decode(buffer.subarray(0, sourceByteLen)); - const sourceIsAscii = sourceText.length === sourceByteLen; - const ast = { - buffer, - sourceText, - sourceByteLen, - sourceIsAscii, - nodes: new Map(), - token: TOKEN, - }; - - walkProgram(programPos, ast, compiledVisitor); - } - - // Send diagnostics back to Rust - const ret = JSON.stringify(diagnostics); - diagnostics.length = 0; - return ret; + for (let id = 0; id < threadCount; id++) { + const worker = new Worker(WORKER_URL, { workerData: { id, LOG } }); + worker.addListener('message', done); + workers.push(worker); + } + }); } // -------------------- // Run linter // -------------------- -// Call Rust, passing `loadPlugin` and `lintFile` as callbacks -const success = await lint(loadPlugin, lintFile); +// Call Rust, passing `initWorkerThreads`, and `loadPlugin` as callbacks +const success = await lint(initWorkerThreads, loadPlugin); + +// Terminate worker threads +await Promise.all(workers.map(worker => worker.terminate())); // Note: It's recommended to set `process.exitCode` instead of calling `process.exit()`. // `process.exit()` kills the process immediately and `stdout` may not be flushed before process dies. diff --git a/napi/oxlint2/src-js/utils.ts b/napi/oxlint2/src-js/utils.ts deleted file mode 100644 index 96d8c48267f68..0000000000000 --- a/napi/oxlint2/src-js/utils.ts +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Get error message from an error. - * - * `err` is expected to be an `Error` object, but can be anything. - * - * This function will never throw, and always returns a string, even if: - * - * * `err` is `null` or `undefined`. - * * `err` is an object with a getter for `message` property which throws. - * * `err` has a getter for `message` property which returns a different value each time it's accessed. - * - * @param err - Error - * @returns Error message - */ -export function getErrorMessage(err: unknown): string { - try { - const { message } = err as undefined | { message: string }; - if (typeof message === 'string' && message !== '') return message; - } catch {} - - return 'Unknown error'; -} - -/** - * Assert a value is of a certain type. - * - * Has no runtime effect - only for guiding the type-checker. - * Minification removes this function and all calls to it, so it has zero runtime cost. - * - * @param value - Value - */ -// oxlint-disable-next-line no-unused-vars -export function assertIs(value: unknown): asserts value is T {} diff --git a/napi/oxlint2/src-js/visitor.ts b/napi/oxlint2/src-js/visitor.ts index 635a1542671f4..600219ff80913 100644 --- a/napi/oxlint2/src-js/visitor.ts +++ b/napi/oxlint2/src-js/visitor.ts @@ -75,7 +75,7 @@ // TODO(camc314): we need to generate `.d.ts` file for this module. // @ts-expect-error import { LEAF_NODE_TYPES_COUNT, NODE_TYPE_IDS_MAP, NODE_TYPES_COUNT } from '../dist/parser/generated/lazy/types.cjs'; -import { assertIs } from './utils.js'; +import assertIs from './assertIs.js'; import type { CompiledVisitorEntry, EnterExit, Node, VisitFn, Visitor } from './types.ts'; diff --git a/napi/oxlint2/src-js/worker.ts b/napi/oxlint2/src-js/worker.ts new file mode 100644 index 0000000000000..c487e58f151fb --- /dev/null +++ b/napi/oxlint2/src-js/worker.ts @@ -0,0 +1,171 @@ +import { createRequire } from 'node:module'; +import { parentPort, workerData } from 'node:worker_threads'; +import assertIs from './assertIs.js'; +import { registerWorker } from './bindings.js'; +import { Context, diagnostics, setupContextForFile } from './context.js'; +import { + DATA_POINTER_POS_32, + SOURCE_LEN_OFFSET, + // TODO(camc314): we need to generate `.d.ts` file for this module. + // @ts-expect-error +} from './generated/constants.cjs'; +import { addVisitorToCompiled, compiledVisitor, finalizeCompiledVisitor, initCompiledVisitor } from './visitor.js'; + +import type { Plugin, Rule } from './context.ts'; + +// Import methods and objects from `oxc-parser`. +// Use `require` not `import` as `oxc-parser` uses `require` internally, +// and need to make sure get same instance of modules as it uses internally, +// otherwise `TOKEN` here won't be same `TOKEN` as used within `oxc-parser`. +const require = createRequire(import.meta.url); +const { TOKEN } = require('../dist/parser/raw-transfer/lazy-common.cjs'), + walkProgram = require('../dist/parser/generated/lazy/walk.cjs'); + +// Register this worker with Rust +const { id: _workerId, LOG: _LOG } = workerData; + +// if (LOG) console.log('> Starting worker', workerId); + +registerWorker(loadPlugins, lintFile); + +parentPort.postMessage(null); + +// -------------------- +// Plugin loading +// -------------------- + +interface RuleAndContext { + rule: Rule; + context: Context; +} + +// Rule objects for loaded rules. +// Indexed by `ruleId`, passed to `lintFile`. +const registeredRules: RuleAndContext[] = []; + +/** + * Load plugins. + * + * @param paths - Array of absolute paths to plugin files + * @returns {Promise} + */ +async function loadPlugins(paths: string[]): Promise { + // if (LOG) console.log('> Loading plugins in worker', workerId); + + const ruleSets: RuleAndContext[][] = await Promise.all(paths.map(loadPlugin)); + + for (const rules of ruleSets) { + registeredRules.push(...rules); + } + + // if (LOG) console.log('> Loaded plugins in worker', workerId, '-', registeredRules.length, 'rules'); +} + +/** + * Load a plugin. + * + * @param {string} path - Absolute path of plugin file + * @returns {RuleAndContext[]} - JSON result + */ +async function loadPlugin(path: string): Promise { + // if (LOG) console.log('> Loading plugin in worker', workerId, path); + + const { default: plugin } = (await import(path)) as { default: Plugin }; + + // if (LOG) console.log('> Loaded plugin in worker', workerId, path); + + const pluginName = plugin.meta.name; + return Object.entries(plugin.rules).map( + ([ruleName, rule]) => ({ rule, context: new Context(`${pluginName}/${ruleName}`) }), + ); +} + +// -------------------- +// Running rules +// -------------------- + +interface BufferWithArrays extends Uint8Array { + uint32: Uint32Array; + float64: Float64Array; +} + +// Buffers cache. +// +// All buffers sent from Rust are stored in this array, indexed by `bufferId` (also sent from Rust). +// Buffers are only added to this array, never removed, so no buffers will be garbage collected +// until the process exits. +const buffers: (BufferWithArrays | null)[] = []; + +// Text decoder, for decoding source text from buffer +const textDecoder = new TextDecoder('utf-8', { ignoreBOM: true }); + +// Run rules on a file. +function lintFile(filePath: string, bufferId: number, buffer: Uint8Array | null, ruleIds: number[]): string { + // if (LOG) console.log('> Linting file in worker', workerId, filePath); + + // If new buffer, add it to `buffers` array. Otherwise, get existing buffer from array. + // Do this before checks below, to make sure buffer doesn't get garbage collected when not expected + // if there's an error. + // TODO: Is this enough to guarantee soundness? + if (buffer === null) { + // Rust will only send a `bufferId` alone, if it previously sent a buffer with this same ID + buffer = buffers[bufferId]!; + } else { + assertIs(buffer); + const { buffer: arrayBuffer, byteOffset } = buffer; + buffer.uint32 = new Uint32Array(arrayBuffer, byteOffset); + buffer.float64 = new Float64Array(arrayBuffer, byteOffset); + + for (let i = bufferId - buffers.length; i >= 0; i--) { + buffers.push(null); + } + buffers[bufferId] = buffer; + } + assertIs(buffer); + + if (typeof filePath !== 'string' || filePath.length === 0) { + throw new Error('expected filePath to be a non-zero length string'); + } + if (!Array.isArray(ruleIds) || ruleIds.length === 0) { + throw new Error('Expected `ruleIds` to be a non-zero len array'); + } + + // Get visitors for this file from all rules + initCompiledVisitor(); + for (let i = 0; i < ruleIds.length; i++) { + const ruleId = ruleIds[i]; + const { rule, context } = registeredRules[ruleId]; + setupContextForFile(context, i, filePath); + const visitor = rule.create(context); + addVisitorToCompiled(visitor); + } + const needsVisit = finalizeCompiledVisitor(); + + // Visit AST. + // Skip this if no visitors visit any nodes. + // Some rules seen in the wild return an empty visitor object from `create` if some initial check fails + // e.g. file extension is not one the rule acts on. + if (needsVisit) { + const { uint32 } = buffer, + programPos = uint32[DATA_POINTER_POS_32], + sourceByteLen = uint32[(programPos + SOURCE_LEN_OFFSET) >> 2]; + + const sourceText = textDecoder.decode(buffer.subarray(0, sourceByteLen)); + const sourceIsAscii = sourceText.length === sourceByteLen; + const ast = { + buffer, + sourceText, + sourceByteLen, + sourceIsAscii, + nodes: new Map(), + token: TOKEN, + }; + + walkProgram(programPos, ast, compiledVisitor); + } + + // Send diagnostics back to Rust + const ret = JSON.stringify(diagnostics); + diagnostics.length = 0; + return ret; +} diff --git a/napi/oxlint2/src/lib.rs b/napi/oxlint2/src/lib.rs index ce96092d722df..a701a3e8d81e8 100644 --- a/napi/oxlint2/src/lib.rs +++ b/napi/oxlint2/src/lib.rs @@ -1,6 +1,7 @@ use std::{ + mem, process::{ExitCode, Termination}, - sync::{Arc, atomic::Ordering, mpsc::channel}, + sync::{Arc, Mutex, atomic::Ordering, mpsc::channel}, }; use napi::{ @@ -12,8 +13,9 @@ use napi_derive::napi; use oxc_allocator::{Allocator, free_fixed_size_allocator}; use oxlint::{ - ExternalLinter, ExternalLinterLintFileCb, ExternalLinterLoadPluginCb, LintFileResult, - PluginLoadResult, lint as oxlint_lint, + ExternalLinter, ExternalLinterInitWorkerThreadsCb, ExternalLinterLintFileCb, + ExternalLinterLoadPluginCb, ExternalLinterLoadPluginsCb, ExternalLinterWorkerCallbacks, + LintFileResult, PluginLoadResult, lint as oxlint_lint, }; mod generated { @@ -21,6 +23,22 @@ mod generated { } use generated::raw_transfer_constants::{BLOCK_ALIGN, BUFFER_SIZE}; +/// Initialize JS worker threads. +#[napi] +pub type JsInitWorkerThreadsCb = ThreadsafeFunction< + // Arguments + u32, // Number of threads + // Return value + Promise<()>, + // Arguments (repeated) + u32, + // Error status + Status, + // CalleeHandled + false, +>; + +/// Load a JS plugin on main thread. #[napi] pub type JsLoadPluginCb = ThreadsafeFunction< // Arguments @@ -35,6 +53,22 @@ pub type JsLoadPluginCb = ThreadsafeFunction< false, >; +/// Load multiple JS plugins on a worker thread. +#[napi] +pub type JsLoadPluginsCb = ThreadsafeFunction< + // Arguments + Vec, // Absolute paths of plugin files + // Return value + Promise<()>, + // Arguments (repeated) + Vec, + // Error status + Status, + // CalleeHandled + false, +>; + +/// Lint a file on a worker thread. #[napi] pub type JsLintFileCb = ThreadsafeFunction< // Arguments @@ -54,62 +88,139 @@ pub type JsLintFileCb = ThreadsafeFunction< false, >; +/// Callback functions for worker threads. +static REGISTERED_WORKERS: Mutex> = Mutex::new(Vec::new()); + +/// Register a JS worker thread. +#[napi] +pub fn register_worker(load_plugins: JsLoadPluginsCb, lint_file: JsLintFileCb) { + let callbacks = ExternalLinterWorkerCallbacks { + load_plugins: wrap_load_plugins(load_plugins), + lint_file: wrap_lint_file(lint_file), + }; + #[expect(clippy::missing_panics_doc)] + REGISTERED_WORKERS.lock().unwrap().push(callbacks); +} + +fn wrap_init_worker_threads(cb: JsInitWorkerThreadsCb) -> ExternalLinterInitWorkerThreadsCb { + let cb = Arc::new(cb); + Arc::new(move |thread_count| { + Box::pin({ + let cb = Arc::clone(&cb); + async move { + REGISTERED_WORKERS.lock().unwrap().reserve(thread_count as usize); + + cb.call_async(thread_count) + .await + .map_err(|e| e.to_string())? + .into_future() + .await + .map_err(|e| e.to_string())?; + + let callbacks_vec = { + let mut guard = REGISTERED_WORKERS.lock().unwrap(); + mem::take(&mut *guard) + }; + + if callbacks_vec.len() != thread_count as usize { + return Err(format!( + "Expected {} JS worker threads to be initialized, but only {} were", + thread_count, + callbacks_vec.len() + )); + } + + Ok(callbacks_vec.into_boxed_slice()) + } + }) + }) +} + fn wrap_load_plugin(cb: JsLoadPluginCb) -> ExternalLinterLoadPluginCb { let cb = Arc::new(cb); Arc::new(move |plugin_name| { Box::pin({ let cb = Arc::clone(&cb); async move { - let result = cb.call_async(plugin_name).await?.into_future().await?; - let plugin_load_result: PluginLoadResult = serde_json::from_str(&result)?; + let result = cb + .call_async(plugin_name) + .await + .map_err(|e| e.to_string())? + .into_future() + .await + .map_err(|e| e.to_string())?; + let plugin_load_result: PluginLoadResult = + serde_json::from_str(&result).map_err(|e| e.to_string())?; Ok(plugin_load_result) } }) }) } +fn wrap_load_plugins(cb: JsLoadPluginsCb) -> ExternalLinterLoadPluginsCb { + let cb = Arc::new(cb); + Arc::new(move |plugin_names| { + Box::pin({ + let cb = Arc::clone(&cb); + async move { + cb.call_async(plugin_names) + .await + .map_err(|e| e.to_string())? + .into_future() + .await + .map_err(|e| e.to_string())?; + Ok(()) + } + }) + }) +} + fn wrap_lint_file(cb: JsLintFileCb) -> ExternalLinterLintFileCb { let cb = Arc::new(cb); - Arc::new(move |file_path: String, rule_ids: Vec, allocator: &Allocator| { - let cb = Arc::clone(&cb); - - let (tx, rx) = channel(); - - // At present, JS plugins all run on main thread, so `thread_id` is always 0 - let thread_id = 0; - - // SAFETY: This crate enables the `fixed_size` feature on `oxc_allocator`, - // so all AST `Allocator`s are created via `FixedSizeAllocator`. - // `thread_id` is valid. There there's always at least 1 thread, so 0 cannot be too high. - let (buffer_id, buffer) = unsafe { get_buffer(allocator, thread_id) }; - - // Send data to JS - let status = cb.call_with_return_value( - FnArgs::from((file_path, buffer_id, buffer, rule_ids)), - ThreadsafeFunctionCallMode::NonBlocking, - move |result, _env| { - let _ = match &result { - Ok(r) => match serde_json::from_str::>(r) { - Ok(v) => tx.send(Ok(v)), - Err(_e) => tx.send(Err("Failed to deserialize lint result".to_string())), - }, - Err(e) => tx.send(Err(e.to_string())), - }; + Arc::new( + // TODO: There's no way to mark a closure as unsafe. Need to find another way. + // Like a `ThreadId` wrapper type? + // SAFETY: `thread_id` must be less than the number of threads in the Rayon global thread pool. + move |file_path: String, rule_ids: Vec, allocator: &Allocator, thread_id: usize| { + let cb = Arc::clone(&cb); - result.map(|_| ()) - }, - ); + let (tx, rx) = channel(); - if status != Status::Ok { - return Err(format!("Failed to schedule callback: {status:?}")); - } + // SAFETY: This crate enables the `fixed_size` feature on `oxc_allocator`, + // so all AST `Allocator`s are created via `FixedSizeAllocator`. + // Caller guarantees that `thread_id` is less than number of threads in Rayon global thread pool. + let (buffer_id, buffer) = unsafe { get_buffer(allocator, thread_id) }; - match rx.recv() { - Ok(Ok(x)) => Ok(x), - Ok(Err(e)) => Err(format!("Callback reported error: {e}")), - Err(e) => Err(format!("Callback did not respond: {e}")), - } - }) + // Send data to JS + let status = cb.call_with_return_value( + FnArgs::from((file_path, buffer_id, buffer, rule_ids)), + ThreadsafeFunctionCallMode::NonBlocking, + move |result, _env| { + let _ = match &result { + Ok(r) => match serde_json::from_str::>(r) { + Ok(v) => tx.send(Ok(v)), + Err(_e) => { + tx.send(Err("Failed to deserialize lint result".to_string())) + } + }, + Err(e) => tx.send(Err(e.to_string())), + }; + + result.map(|_| ()) + }, + ); + + if status != Status::Ok { + return Err(format!("Failed to schedule callback: {status:?}")); + } + + match rx.recv() { + Ok(Ok(x)) => Ok(x), + Ok(Err(e)) => Err(format!("Callback reported error: {e}")), + Err(e) => Err(format!("Callback did not respond: {e}")), + } + }, + ) } /// Get buffer ID of the `Allocator` and, if it hasn't already been sent to this JS thread, @@ -217,10 +328,10 @@ unsafe fn get_buffer( #[expect(clippy::allow_attributes)] #[allow(clippy::trailing_empty_array, clippy::unused_async)] // https://github.com/napi-rs/napi-rs/issues/2758 #[napi] -pub async fn lint(load_plugin: JsLoadPluginCb, lint_file: JsLintFileCb) -> bool { +pub async fn lint(init_worker_threads: JsInitWorkerThreadsCb, load_plugin: JsLoadPluginCb) -> bool { + let rust_init_worker_threads = wrap_init_worker_threads(init_worker_threads); let rust_load_plugin = wrap_load_plugin(load_plugin); - let rust_lint_file = wrap_lint_file(lint_file); - oxlint_lint(Some(ExternalLinter::new(rust_load_plugin, rust_lint_file))).report() + oxlint_lint(Some(ExternalLinter::new(rust_init_worker_threads, rust_load_plugin))).report() == ExitCode::SUCCESS } diff --git a/napi/oxlint2/tsdown.config.ts b/napi/oxlint2/tsdown.config.ts index 6b965f5e49a2f..e65a4bf40a957 100644 --- a/napi/oxlint2/tsdown.config.ts +++ b/napi/oxlint2/tsdown.config.ts @@ -1,7 +1,7 @@ import { defineConfig } from 'tsdown'; export default defineConfig({ - entry: ['src-js/index.ts'], + entry: ['src-js/index.ts', 'src-js/worker.ts'], format: ['esm'], platform: 'node', target: 'node20',