From ea908f742d8165e8a01aad5e3b2e68b3f6953dc0 Mon Sep 17 00:00:00 2001 From: DonIsaac <22823424+DonIsaac@users.noreply.github.com> Date: Sun, 29 Sep 2024 02:48:01 +0000 Subject: [PATCH] refactor(linter): consolidate file loading logic (#6130) # Human Description Low on time, so this one is short. - consolidate source file and partial loader logic into `loader` module. I have more plans for this. - ~LSP no longer uses `VALID_EXTENSIONS`, so now `.d.ts` files (and the like) will be linted as well~ LSP does not respect `.gitignore` files, so this change was reverted. # AI Description ## Refactor Loader and Partial Loader This PR refactors the loader and partial loader functionality in the oxc_linter crate: * Introduce a new `Loader` struct with methods for checking if a file can be loaded and loading file contents * Move `partial_loader` module to `loader/partial_loader` * Rename `JavaScriptSource` to `source.rs` and move it to the `loader` module * Update `JavaScriptSource` to use `u32` for `start` offset instead of `usize` * Refactor `IsolatedLintHandler` to use the new `Loader` * Update imports and module references throughout the codebase This change improves the organization of the loader-related code and provides a more unified interface for loading different file types. --- Cargo.lock | 1 + apps/oxlint/src/lint/mod.rs | 4 +- crates/oxc_language_server/Cargo.toml | 14 ++- crates/oxc_language_server/src/linter.rs | 112 +++++++----------- crates/oxc_linter/src/lib.rs | 2 +- crates/oxc_linter/src/loader/mod.rs | 107 +++++++++++++++++ .../src/{ => loader}/partial_loader/astro.rs | 12 +- .../src/{ => loader}/partial_loader/mod.rs | 22 +--- .../src/{ => loader}/partial_loader/svelte.rs | 8 +- .../src/{ => loader}/partial_loader/vue.rs | 4 +- crates/oxc_linter/src/loader/source.rs | 33 ++++++ .../src/rules/unicorn/no_empty_file.rs | 2 +- crates/oxc_linter/src/service.rs | 6 +- 13 files changed, 219 insertions(+), 108 deletions(-) create mode 100644 crates/oxc_linter/src/loader/mod.rs rename crates/oxc_linter/src/{ => loader}/partial_loader/astro.rs (93%) rename crates/oxc_linter/src/{ => loader}/partial_loader/mod.rs (68%) rename crates/oxc_linter/src/{ => loader}/partial_loader/svelte.rs (87%) rename crates/oxc_linter/src/{ => loader}/partial_loader/vue.rs (96%) create mode 100644 crates/oxc_linter/src/loader/source.rs diff --git a/Cargo.lock b/Cargo.lock index 70b1f8983e107..7dec3ed221c82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1621,6 +1621,7 @@ dependencies = [ "oxc_semantic", "oxc_span", "ropey", + "rustc-hash", "serde", "serde_json", "tokio", diff --git a/apps/oxlint/src/lint/mod.rs b/apps/oxlint/src/lint/mod.rs index 778f5b08088cd..abbd0ed9a73ad 100644 --- a/apps/oxlint/src/lint/mod.rs +++ b/apps/oxlint/src/lint/mod.rs @@ -3,8 +3,8 @@ use std::{env, io::BufWriter, time::Instant}; use ignore::gitignore::Gitignore; use oxc_diagnostics::{DiagnosticService, GraphicalReportHandler}; use oxc_linter::{ - partial_loader::LINT_PARTIAL_LOADER_EXT, AllowWarnDeny, InvalidFilterKind, LintFilter, - LintService, LintServiceOptions, Linter, OxlintOptions, + loader::LINT_PARTIAL_LOADER_EXT, AllowWarnDeny, InvalidFilterKind, LintFilter, LintService, + LintServiceOptions, Linter, OxlintOptions, }; use oxc_span::VALID_EXTENSIONS; diff --git a/crates/oxc_language_server/Cargo.toml b/crates/oxc_language_server/Cargo.toml index 2761994970e15..7de85362918de 100644 --- a/crates/oxc_language_server/Cargo.toml +++ b/crates/oxc_language_server/Cargo.toml @@ -22,19 +22,21 @@ test = false doctest = false [dependencies] -dashmap = { workspace = true } -env_logger = { workspace = true, features = ["humantime"] } -futures = { workspace = true } -globset = { workspace = true } -ignore = { workspace = true, features = ["simd-accel"] } -log = { workspace = true } oxc_allocator = { workspace = true } oxc_diagnostics = { workspace = true } oxc_linter = { workspace = true } oxc_parser = { workspace = true } oxc_semantic = { workspace = true } oxc_span = { workspace = true } + +dashmap = { workspace = true } +env_logger = { workspace = true, features = ["humantime"] } +futures = { workspace = true } +globset = { workspace = true } +ignore = { workspace = true, features = ["simd-accel"] } +log = { workspace = true } ropey = { workspace = true } +rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } tokio = { workspace = true, features = ["full"] } diff --git a/crates/oxc_language_server/src/linter.rs b/crates/oxc_language_server/src/linter.rs index c8a4114951abf..9ac26e6966ef7 100644 --- a/crates/oxc_language_server/src/linter.rs +++ b/crates/oxc_language_server/src/linter.rs @@ -1,24 +1,23 @@ +use oxc_linter::loader::LINT_PARTIAL_LOADER_EXT; use std::{ fs, path::{Path, PathBuf}, rc::Rc, - sync::Arc, + sync::{Arc, OnceLock}, }; use log::debug; use oxc_allocator::Allocator; use oxc_diagnostics::{Error, NamedSource, Severity}; use oxc_linter::{ - partial_loader::{ - AstroPartialLoader, JavaScriptSource, SveltePartialLoader, VuePartialLoader, - LINT_PARTIAL_LOADER_EXT, - }, + loader::{JavaScriptSource, Loader}, FixKind, Linter, }; use oxc_parser::{ParseOptions, Parser}; use oxc_semantic::SemanticBuilder; -use oxc_span::{SourceType, VALID_EXTENSIONS}; +use oxc_span::VALID_EXTENSIONS; use ropey::Rope; +use rustc_hash::FxHashSet; use tower_lsp::lsp_types::{ self, DiagnosticRelatedInformation, DiagnosticSeverity, Position, Range, Url, }; @@ -154,11 +153,12 @@ pub struct FixedContent { pub struct IsolatedLintHandler { linter: Arc, + loader: Loader, } impl IsolatedLintHandler { pub fn new(linter: Arc) -> Self { - Self { linter } + Self { linter, loader: Loader } } pub fn run_single( @@ -166,8 +166,8 @@ impl IsolatedLintHandler { path: &Path, content: Option, ) -> Option> { - if Self::is_wanted_ext(path) { - Some(Self::lint_path(&self.linter, path, content).map_or(vec![], |(p, errors)| { + if Self::should_lint_path(path) { + Some(self.lint_path(path, content).map_or(vec![], |(p, errors)| { let mut diagnostics: Vec = errors.into_iter().map(|e| e.into_diagnostic_report(&p)).collect(); // a diagnostics connected from related_info to original diagnostic @@ -212,62 +212,33 @@ impl IsolatedLintHandler { } } - fn is_wanted_ext(path: &Path) -> bool { - let extensions = get_valid_extensions(); - path.extension().map_or(false, |ext| extensions.contains(&ext.to_string_lossy().as_ref())) - } - - fn get_source_type_and_text( + fn lint_path( + &self, path: &Path, source_text: Option, - ext: &str, - ) -> Option<(SourceType, String)> { - let source_type = SourceType::from_path(path); - let not_supported_yet = - source_type.as_ref().is_err_and(|_| !LINT_PARTIAL_LOADER_EXT.contains(&ext)); - if not_supported_yet { - debug!("extension {ext} not supported yet."); + ) -> Option<(PathBuf, Vec)> { + if !Loader::can_load(path) { + debug!("extension not supported yet."); return None; } - let source_type = source_type.unwrap_or_default(); let source_text = source_text.map_or_else( || fs::read_to_string(path).unwrap_or_else(|_| panic!("Failed to read {path:?}")), |source_text| source_text, ); - - Some((source_type, source_text)) - } - - fn may_need_extract_js_content<'a>( - source_text: &'a str, - ext: &str, - ) -> Option>> { - match ext { - "vue" => Some(VuePartialLoader::new(source_text).parse()), - "astro" => Some(AstroPartialLoader::new(source_text).parse()), - "svelte" => Some(SveltePartialLoader::new(source_text).parse()), - _ => None, - } - } - - fn lint_path( - linter: &Linter, - path: &Path, - source_text: Option, - ) -> Option<(PathBuf, Vec)> { - let ext = path.extension().and_then(std::ffi::OsStr::to_str)?; - let (source_type, original_source_text) = - Self::get_source_type_and_text(path, source_text, ext)?; - let javascript_sources = Self::may_need_extract_js_content(&original_source_text, ext) - .unwrap_or_else(|| { - vec![JavaScriptSource { source_text: &original_source_text, source_type, start: 0 }] - }); + let javascript_sources = match self.loader.load_str(path, &source_text) { + Ok(s) => s, + Err(e) => { + debug!("failed to load {path:?}: {e}"); + return None; + } + }; debug!("lint {path:?}"); let mut diagnostics = vec![]; for source in javascript_sources { - let JavaScriptSource { source_text: javascript_source_text, source_type, start } = - source; + let JavaScriptSource { + source_text: javascript_source_text, source_type, start, .. + } = source; let allocator = Allocator::default(); let ret = Parser::new(&allocator, javascript_source_text, source_type) .with_options(ParseOptions { @@ -285,7 +256,7 @@ impl IsolatedLintHandler { fixed_content: None, }) .collect(); - return Some(Self::wrap_diagnostics(path, &original_source_text, reports, start)); + return Some(Self::wrap_diagnostics(path, &source_text, reports, start)); }; let program = allocator.alloc(ret.program); @@ -304,10 +275,10 @@ impl IsolatedLintHandler { fixed_content: None, }) .collect(); - return Some(Self::wrap_diagnostics(path, &original_source_text, reports, start)); + return Some(Self::wrap_diagnostics(path, &source_text, reports, start)); }; - let result = linter.run(path, Rc::new(semantic_ret.semantic)); + let result = self.linter.run(path, Rc::new(semantic_ret.semantic)); let reports = result .into_iter() @@ -316,12 +287,12 @@ impl IsolatedLintHandler { code: f.content.to_string(), range: Range { start: offset_to_position( - f.span.start as usize + start, + (f.span.start + start) as usize, javascript_source_text, ) .unwrap_or_default(), end: offset_to_position( - f.span.end as usize + start, + (f.span.end + start) as usize, javascript_source_text, ) .unwrap_or_default(), @@ -332,18 +303,29 @@ impl IsolatedLintHandler { }) .collect::>(); let (_, errors_with_position) = - Self::wrap_diagnostics(path, &original_source_text, reports, start); + Self::wrap_diagnostics(path, &source_text, reports, start); diagnostics.extend(errors_with_position); } Some((path.to_path_buf(), diagnostics)) } + fn should_lint_path(path: &Path) -> bool { + static WANTED_EXTENSIONS: OnceLock> = OnceLock::new(); + let wanted_exts = WANTED_EXTENSIONS.get_or_init(|| { + VALID_EXTENSIONS.iter().chain(LINT_PARTIAL_LOADER_EXT.iter()).copied().collect() + }); + + path.extension() + .and_then(std::ffi::OsStr::to_str) + .map_or(false, |ext| wanted_exts.contains(ext)) + } + fn wrap_diagnostics( path: &Path, source_text: &str, reports: Vec, - start: usize, + start: u32, ) -> (PathBuf, Vec) { let source = Arc::new(NamedSource::new(path.to_string_lossy(), source_text.to_owned())); let diagnostics = reports @@ -353,7 +335,7 @@ impl IsolatedLintHandler { report.error.with_source_code(Arc::clone(&source)), source_text, report.fixed_content, - start, + start as usize, ) }) .collect(); @@ -361,14 +343,6 @@ impl IsolatedLintHandler { } } -fn get_valid_extensions() -> Vec<&'static str> { - VALID_EXTENSIONS - .iter() - .chain(LINT_PARTIAL_LOADER_EXT.iter()) - .copied() - .collect::>() -} - #[allow(clippy::cast_possible_truncation)] fn offset_to_position(offset: usize, source_text: &str) -> Option { let rope = Rope::from_str(source_text); diff --git a/crates/oxc_linter/src/lib.rs b/crates/oxc_linter/src/lib.rs index 723050863a754..b75e629d59ba2 100644 --- a/crates/oxc_linter/src/lib.rs +++ b/crates/oxc_linter/src/lib.rs @@ -18,7 +18,7 @@ mod rules; mod service; mod utils; -pub mod partial_loader; +pub mod loader; pub mod table; use std::{io::Write, path::Path, rc::Rc, sync::Arc}; diff --git a/crates/oxc_linter/src/loader/mod.rs b/crates/oxc_linter/src/loader/mod.rs new file mode 100644 index 0000000000000..f11f1f55969ac --- /dev/null +++ b/crates/oxc_linter/src/loader/mod.rs @@ -0,0 +1,107 @@ +mod partial_loader; +mod source; + +use std::{error::Error, fmt, path::Path}; + +use oxc_span::SourceType; + +pub use partial_loader::{PartialLoader, LINT_PARTIAL_LOADER_EXT}; +pub use source::JavaScriptSource; + +// TODO: use oxc_resolver::FileSystem. We can't do so until that crate exposes FileSystemOs +// externally. +#[derive(Default, Clone)] +pub struct Loader; + +impl Loader { + pub fn can_load>(path: P) -> bool { + let path = path.as_ref(); + SourceType::from_path(path).is_ok() + || path + .extension() + .and_then(std::ffi::OsStr::to_str) + .is_some_and(|ext| LINT_PARTIAL_LOADER_EXT.contains(&ext)) + } + + /// # Errors + /// - If the file is too large (> 4GB, or u32::MAX) + /// - If the file has no extension + /// - If the file extension is not supported + pub fn load_str<'a, P: AsRef>( + &self, + path: P, + source_text: &'a str, + ) -> Result>, LoadError> { + if source_text.len() > u32::MAX as usize { + return Err(LoadError::TooLarge); + } + + let path = path.as_ref(); + let ext = path.extension().ok_or(LoadError::NoExtension)?; + // file extension is not unicode, we definitely don't support it. + let ext = ext.to_str().ok_or_else(|| LoadError::unsupported(ext))?; + + // let source_type = SourceType::from_path(path); + if let Ok(source_type) = SourceType::from_path(path) { + Ok(vec![JavaScriptSource::new(source_text, source_type)]) + } else { + let partial = PartialLoader::parse(ext, source_text); + partial.ok_or_else(|| LoadError::UnsupportedFileType(ext.to_string())) + } + } +} + +#[derive(Debug, Clone)] +pub enum LoadError { + TooLarge, + NoExtension, + UnsupportedFileType(String), +} + +impl LoadError { + pub(super) fn unsupported(ext: &std::ffi::OsStr) -> Self { + Self::UnsupportedFileType(ext.to_string_lossy().to_string()) + } +} + +impl fmt::Display for LoadError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::TooLarge => write!(f, "file is too large. Only files up to 4GB are supported."), + Self::NoExtension => write!(f, "no extension"), + Self::UnsupportedFileType(ext) => write!(f, "unsupported file type: {ext}"), + } + } +} + +impl Error for LoadError {} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_loader_can_handle() { + let paths = [ + "foo.js", + "foo.jsx", + "foo.mjs", + "foo.cjs", + "foo.ts", + "foo.tsx", + "foo.mts", + "foo.cts", + "foo.d.ts", + "foo.d.tsx", + "foo.d.mts", + "foo.d.cts", + "foo.astro", + "foo.svelte", + "foo.vue", + ]; + + for path in paths { + assert!(Loader::can_load(path)); + } + } +} diff --git a/crates/oxc_linter/src/partial_loader/astro.rs b/crates/oxc_linter/src/loader/partial_loader/astro.rs similarity index 93% rename from crates/oxc_linter/src/partial_loader/astro.rs rename to crates/oxc_linter/src/loader/partial_loader/astro.rs index bf9f5f52b2b7e..c5199b53bf393 100644 --- a/crates/oxc_linter/src/partial_loader/astro.rs +++ b/crates/oxc_linter/src/loader/partial_loader/astro.rs @@ -1,7 +1,8 @@ use memchr::memmem::Finder; use oxc_span::{SourceType, Span}; -use super::{JavaScriptSource, SCRIPT_END, SCRIPT_START}; +use super::{SCRIPT_END, SCRIPT_START}; +use crate::loader::JavaScriptSource; const ASTRO_SPLIT: &str = "---"; @@ -43,7 +44,7 @@ impl<'a> AstroPartialLoader<'a> { let js_code = Span::new(start + ASTRO_SPLIT.len() as u32, end).source_text(self.source_text); - Some(JavaScriptSource::new(js_code, SourceType::ts(), start as usize)) + Some(JavaScriptSource::partial(js_code, SourceType::ts(), start)) } /// In .astro files, you can add client-side JavaScript by adding one (or more) `"; pub const LINT_PARTIAL_LOADER_EXT: &[&str] = &["vue", "astro", "svelte"]; -#[derive(Debug, Clone, Copy)] -pub struct JavaScriptSource<'a> { - pub source_text: &'a str, - pub source_type: SourceType, - /// The javascript source could be embedded in some file, - /// use `start` to record start offset of js block in the original file. - pub start: usize, -} - -impl<'a> JavaScriptSource<'a> { - pub fn new(source_text: &'a str, source_type: SourceType, start: usize) -> Self { - Self { source_text, source_type, start } - } -} - pub struct PartialLoader; impl PartialLoader { - /// Extract js section of specifial files. - /// Returns `None` if the specifial file does not have a js section. + /// Extract js section of special files. + /// Returns `None` if the special file does not have a js section. pub fn parse<'a>(ext: &str, source_text: &'a str) -> Option>> { match ext { "vue" => Some(VuePartialLoader::new(source_text).parse()), diff --git a/crates/oxc_linter/src/partial_loader/svelte.rs b/crates/oxc_linter/src/loader/partial_loader/svelte.rs similarity index 87% rename from crates/oxc_linter/src/partial_loader/svelte.rs rename to crates/oxc_linter/src/loader/partial_loader/svelte.rs index e7da4d236bafa..e88196c4ef4a9 100644 --- a/crates/oxc_linter/src/partial_loader/svelte.rs +++ b/crates/oxc_linter/src/loader/partial_loader/svelte.rs @@ -1,7 +1,8 @@ use memchr::memmem::Finder; use oxc_span::SourceType; -use super::{find_script_closing_angle, JavaScriptSource, SCRIPT_END, SCRIPT_START}; +use super::{find_script_closing_angle, SCRIPT_END, SCRIPT_START}; +use crate::loader::JavaScriptSource; pub struct SveltePartialLoader<'a> { source_text: &'a str, @@ -42,7 +43,10 @@ impl<'a> SveltePartialLoader<'a> { let source_text = &self.source_text[js_start..js_end]; let source_type = SourceType::mjs().with_typescript(is_ts); - Some(JavaScriptSource::new(source_text, source_type, js_start)) + + // NOTE: loader checked that source_text.len() is less than u32::MAX + #[allow(clippy::cast_possible_truncation)] + Some(JavaScriptSource::partial(source_text, source_type, js_start as u32)) } } diff --git a/crates/oxc_linter/src/partial_loader/vue.rs b/crates/oxc_linter/src/loader/partial_loader/vue.rs similarity index 96% rename from crates/oxc_linter/src/partial_loader/vue.rs rename to crates/oxc_linter/src/loader/partial_loader/vue.rs index 584751fd7ccc3..d4f9999e5e466 100644 --- a/crates/oxc_linter/src/partial_loader/vue.rs +++ b/crates/oxc_linter/src/loader/partial_loader/vue.rs @@ -57,7 +57,9 @@ impl<'a> VuePartialLoader<'a> { let source_text = &self.source_text[js_start..js_end]; let source_type = SourceType::mjs().with_typescript(is_ts).with_jsx(is_jsx); - Some(JavaScriptSource::new(source_text, source_type, js_start)) + // NOTE: loader checked that source_text.len() is less than u32::MAX + #[allow(clippy::cast_possible_truncation)] + Some(JavaScriptSource::partial(source_text, source_type, js_start as u32)) } } diff --git a/crates/oxc_linter/src/loader/source.rs b/crates/oxc_linter/src/loader/source.rs new file mode 100644 index 0000000000000..479fb8b77b899 --- /dev/null +++ b/crates/oxc_linter/src/loader/source.rs @@ -0,0 +1,33 @@ +use oxc_span::SourceType; + +#[derive(Debug, Clone, Copy)] +#[non_exhaustive] +pub struct JavaScriptSource<'a> { + pub source_text: &'a str, + pub source_type: SourceType, + /// The javascript source could be embedded in some file, + /// use `start` to record start offset of js block in the original file. + pub start: u32, + #[allow(dead_code)] + is_partial: bool, +} + +impl<'a> JavaScriptSource<'a> { + pub fn new(source_text: &'a str, source_type: SourceType) -> Self { + Self { source_text, source_type, start: 0, is_partial: false } + } + + pub fn partial(source_text: &'a str, source_type: SourceType, start: u32) -> Self { + Self { source_text, source_type, start, is_partial: true } + } + + pub fn as_str(&self) -> &'a str { + &self.source_text[(self.start as usize)..] + } +} + +impl AsRef for JavaScriptSource<'_> { + fn as_ref(&self) -> &str { + self.as_str() + } +} diff --git a/crates/oxc_linter/src/rules/unicorn/no_empty_file.rs b/crates/oxc_linter/src/rules/unicorn/no_empty_file.rs index 202212029e90f..dc928bf8dbc2a 100644 --- a/crates/oxc_linter/src/rules/unicorn/no_empty_file.rs +++ b/crates/oxc_linter/src/rules/unicorn/no_empty_file.rs @@ -4,7 +4,7 @@ use oxc_macros::declare_oxc_lint; use oxc_span::Span; use crate::{ - context::LintContext, partial_loader::LINT_PARTIAL_LOADER_EXT, rule::Rule, utils::is_empty_stmt, + context::LintContext, loader::LINT_PARTIAL_LOADER_EXT, rule::Rule, utils::is_empty_stmt, }; fn no_empty_file_diagnostic(span: Span) -> OxcDiagnostic { diff --git a/crates/oxc_linter/src/service.rs b/crates/oxc_linter/src/service.rs index a2305eef2b8be..3de44f380fff8 100644 --- a/crates/oxc_linter/src/service.rs +++ b/crates/oxc_linter/src/service.rs @@ -17,7 +17,7 @@ use rayon::{iter::ParallelBridge, prelude::ParallelIterator}; use rustc_hash::{FxHashMap, FxHashSet}; use crate::{ - partial_loader::{JavaScriptSource, PartialLoader, LINT_PARTIAL_LOADER_EXT}, + loader::{JavaScriptSource, PartialLoader, LINT_PARTIAL_LOADER_EXT}, utils::read_to_string, Fixer, Linter, Message, }; @@ -252,8 +252,8 @@ impl Runtime { let sources = PartialLoader::parse(ext, &source_text); let is_processed_by_partial_loader = sources.is_some(); - let sources = - sources.unwrap_or_else(|| vec![JavaScriptSource::new(&source_text, source_type, 0)]); + let sources = sources + .unwrap_or_else(|| vec![JavaScriptSource::partial(&source_text, source_type, 0)]); if sources.is_empty() { self.ignore_path(path);