From 608d637e4d3423db6beb68f7ac32ce6c7768e1a5 Mon Sep 17 00:00:00 2001 From: camchenry <1514176+camchenry@users.noreply.github.com> Date: Thu, 19 Sep 2024 22:40:01 +0000 Subject: [PATCH] perf(linter): use `aho-corasick` instead of `regex` for string matching in `jsx-a11y/img-redundant-alt` (#5892) hypothesis: profiling shows that Regex creation takes a decent amount of time. the `regex` crate uses `aho-corasick` internally for string matching, which is all we need in some cases. in theory, we could save time by using the lib directly and not needing the full regex syntax. --- Cargo.lock | 1 + Cargo.toml | 1 + crates/oxc_linter/Cargo.toml | 1 + .../src/rules/jsx_a11y/img_redundant_alt.rs | 30 ++++++++++++------- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 47e6742c90dac..78fbf6d025dc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1627,6 +1627,7 @@ dependencies = [ name = "oxc_linter" version = "0.9.6" dependencies = [ + "aho-corasick", "bitflags 2.6.0", "convert_case", "cow-utils", diff --git a/Cargo.toml b/Cargo.toml index 592331a330472..787004f30f207 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -108,6 +108,7 @@ napi = "3.0.0-alpha.8" napi-build = "2.1.3" napi-derive = "3.0.0-alpha.7" +aho-corasick = "1.1.3" allocator-api2 = "0.2.18" assert-unchecked = "0.1.2" base64 = "0.22.1" diff --git a/crates/oxc_linter/Cargo.toml b/crates/oxc_linter/Cargo.toml index bd53c767a4409..985d3b00fe7fb 100644 --- a/crates/oxc_linter/Cargo.toml +++ b/crates/oxc_linter/Cargo.toml @@ -34,6 +34,7 @@ oxc_semantic = { workspace = true } oxc_span = { workspace = true, features = ["schemars", "serialize"] } oxc_syntax = { workspace = true } +aho-corasick = { workspace = true } bitflags = { workspace = true } convert_case = { workspace = true } cow-utils = { workspace = true } diff --git a/crates/oxc_linter/src/rules/jsx_a11y/img_redundant_alt.rs b/crates/oxc_linter/src/rules/jsx_a11y/img_redundant_alt.rs index d867ae65ba78a..5fa68623a2c95 100644 --- a/crates/oxc_linter/src/rules/jsx_a11y/img_redundant_alt.rs +++ b/crates/oxc_linter/src/rules/jsx_a11y/img_redundant_alt.rs @@ -1,3 +1,4 @@ +use aho_corasick::AhoCorasick; use oxc_ast::{ ast::{JSXAttributeItem, JSXAttributeName, JSXAttributeValue, JSXExpression}, AstKind, @@ -5,7 +6,6 @@ use oxc_ast::{ use oxc_diagnostics::OxcDiagnostic; use oxc_macros::declare_oxc_lint; use oxc_span::{CompactStr, Span}; -use regex::{Regex, RegexBuilder}; use serde_json::Value; use crate::{ @@ -28,7 +28,7 @@ pub struct ImgRedundantAlt(Box); #[derive(Debug, Clone)] pub struct ImgRedundantAltConfig { types_to_validate: Vec, - redundant_words: Regex, + redundant_words: AhoCorasick, } impl std::ops::Deref for ImgRedundantAlt { @@ -45,21 +45,25 @@ impl Default for ImgRedundantAltConfig { fn default() -> Self { Self { types_to_validate: vec![CompactStr::new("img")], - redundant_words: Self::union(&REDUNDANT_WORDS).unwrap(), + redundant_words: AhoCorasick::builder() + .ascii_case_insensitive(true) + .build(REDUNDANT_WORDS) + .expect("Could not build AhoCorasick"), } } } impl ImgRedundantAltConfig { - fn new(types_to_validate: Vec<&str>, redundant_words: &[&str]) -> Result { + fn new( + types_to_validate: Vec<&str>, + redundant_words: &[&str], + ) -> Result { Ok(Self { types_to_validate: types_to_validate.into_iter().map(Into::into).collect(), - redundant_words: Self::union(redundant_words)?, + redundant_words: AhoCorasick::builder() + .ascii_case_insensitive(true) + .build(redundant_words)?, }) } - - fn union(strs: &[&str]) -> Result { - RegexBuilder::new(&format!(r"(?i)\b({})\b", strs.join("|"))).case_insensitive(true).build() - } } declare_oxc_lint!( @@ -191,7 +195,13 @@ impl Rule for ImgRedundantAlt { impl ImgRedundantAlt { #[inline] fn is_redundant_alt_text(&self, alt_text: &str) -> bool { - self.redundant_words.is_match(alt_text) + for mat in self.redundant_words.find_iter(alt_text) { + // check if followed by space or is whole text + if mat.end() == alt_text.len() || alt_text.as_bytes()[mat.end()] == b' ' { + return true; + } + } + false } }