diff --git a/apps/oxlint/conformance/snapshot.md b/apps/oxlint/conformance/snapshot.md index d7a9a76989e6f..e320a4b211212 100644 --- a/apps/oxlint/conformance/snapshot.md +++ b/apps/oxlint/conformance/snapshot.md @@ -7,8 +7,8 @@ | Status | Count | % | | ----------------- | ----- | ------ | | Total rules | 292 | 100.0% | -| Fully passing | 289 | 99.0% | -| Partially passing | 3 | 1.0% | +| Fully passing | 291 | 99.7% | +| Partially passing | 1 | 0.3% | | Fully failing | 0 | 0.0% | | Load errors | 0 | 0.0% | | No tests run | 0 | 0.0% | @@ -18,8 +18,8 @@ | Status | Count | % | | ----------- | ----- | ------ | | Total tests | 33090 | 100.0% | -| Passing | 32803 | 99.1% | -| Failing | 5 | 0.0% | +| Passing | 32807 | 99.1% | +| Failing | 1 | 0.0% | | Skipped | 282 | 0.9% | ## Fully Passing Rules @@ -156,6 +156,7 @@ - `no-inner-declarations` (68 tests) - `no-invalid-regexp` (108 tests) - `no-invalid-this` (562 tests) (4 skipped) +- `no-irregular-whitespace` (280 tests) - `no-iterator` (9 tests) - `no-label-var` (5 tests) - `no-labels` (29 tests) @@ -306,6 +307,7 @@ - `symbol-description` (8 tests) - `template-curly-spacing` (57 tests) - `template-tag-spacing` (63 tests) +- `unicode-bom` (7 tests) - `use-isnan` (214 tests) - `valid-typeof` (54 tests) - `vars-on-top` (61 tests) @@ -317,8 +319,6 @@ ## Rules with Failures - `no-eval` - 100 / 101 (99.0%) -- `no-irregular-whitespace` - 279 / 280 (99.6%) -- `unicode-bom` - 4 / 7 (57.1%) ## Rules with Failures Detail @@ -359,146 +359,3 @@ AssertionError [ERR_ASSERTION]: Should have 1 error but had 0: [] at runInvalidTestCase (apps/oxlint/dist/index.js) at apps/oxlint/dist/index.js - -### `no-irregular-whitespace` - -Pass: 279 / 280 (99.6%) -Fail: 1 / 280 (0.4%) -Skip: 0 / 280 (0.0%) - -#### no-irregular-whitespace > valid - -```js -console.log('hello BOM'); -``` - -```json -{} -``` - -AssertionError [ERR_ASSERTION]: Should have no errors but had 1: [ - { - ruleId: 'rule-to-test/no-irregular-whitespace', - message: 'Irregular whitespace not allowed.', - messageId: 'noIrregularWhitespace', - severity: 1, - nodeType: null, - line: 1, - column: 0, - endLine: 1, - endColumn: 1, - suggestions: null - } -] - -1 !== 0 - - at assertErrorCountIsCorrect (apps/oxlint/dist/index.js) - at assertValidTestCasePasses (apps/oxlint/dist/index.js) - at runValidTestCase (apps/oxlint/dist/index.js) - at apps/oxlint/dist/index.js - - -### `unicode-bom` - -Pass: 4 / 7 (57.1%) -Fail: 3 / 7 (42.9%) -Skip: 0 / 7 (0.0%) - -#### unicode-bom > valid - -```js - var a = 123; -``` - -```json -{ - "options": [ - "always" - ] -} -``` - -AssertionError [ERR_ASSERTION]: Should have no errors but had 1: [ - { - ruleId: 'rule-to-test/unicode-bom', - message: 'Expected Unicode BOM (Byte Order Mark).', - messageId: 'expected', - severity: 1, - nodeType: null, - line: 1, - column: 0, - endLine: 1, - endColumn: 0, - suggestions: null - } -] - -1 !== 0 - - at assertErrorCountIsCorrect (apps/oxlint/dist/index.js) - at assertValidTestCasePasses (apps/oxlint/dist/index.js) - at runValidTestCase (apps/oxlint/dist/index.js) - at apps/oxlint/dist/index.js - - -#### unicode-bom > invalid - -```js - var a = 123; -``` - -```json -{ - "output": " var a = 123;", - "errors": [ - { - "messageId": "unexpected", - "line": 1, - "column": 1 - } - ] -} -``` - -AssertionError [ERR_ASSERTION]: Should have 1 error but had 0: [] - -0 !== 1 - - at assertErrorCountIsCorrect (apps/oxlint/dist/index.js) - at assertInvalidTestCasePasses (apps/oxlint/dist/index.js) - at runInvalidTestCase (apps/oxlint/dist/index.js) - at apps/oxlint/dist/index.js - - -#### unicode-bom > invalid - -```js - var a = 123; -``` - -```json -{ - "output": " var a = 123;", - "options": [ - "never" - ], - "errors": [ - { - "messageId": "unexpected", - "line": 1, - "column": 1 - } - ] -} -``` - -AssertionError [ERR_ASSERTION]: Should have 1 error but had 0: [] - -0 !== 1 - - at assertErrorCountIsCorrect (apps/oxlint/dist/index.js) - at assertInvalidTestCasePasses (apps/oxlint/dist/index.js) - at runInvalidTestCase (apps/oxlint/dist/index.js) - at apps/oxlint/dist/index.js - diff --git a/apps/oxlint/src-js/generated/constants.ts b/apps/oxlint/src-js/generated/constants.ts index 7bd97a8f28ead..51782c7b49ecf 100644 --- a/apps/oxlint/src-js/generated/constants.ts +++ b/apps/oxlint/src-js/generated/constants.ts @@ -6,6 +6,7 @@ export const BUFFER_ALIGN = 4294967296; export const DATA_POINTER_POS_32 = 536870902; export const IS_TS_FLAG_POS = 2147483612; export const IS_JSX_FLAG_POS = 2147483613; +export const HAS_BOM_FLAG_POS = 2147483614; export const PROGRAM_OFFSET = 0; export const SOURCE_START_OFFSET = 8; export const SOURCE_LEN_OFFSET = 16; diff --git a/apps/oxlint/src-js/plugins/lint.ts b/apps/oxlint/src-js/plugins/lint.ts index 5c7e3ee15ea19..5023a6d272951 100644 --- a/apps/oxlint/src-js/plugins/lint.ts +++ b/apps/oxlint/src-js/plugins/lint.ts @@ -5,6 +5,7 @@ import { allOptions, DEFAULT_OPTIONS_ID } from "./options.ts"; import { diagnostics } from "./report.ts"; import { setSettingsForFile, resetSettings } from "./settings.ts"; import { ast, initAst, resetSourceAndAst, setupSourceForFile } from "./source_code.ts"; +import { HAS_BOM_FLAG_POS } from "../generated/constants.ts"; import { typeAssertIs, debugAssert, debugAssertIsNonNull } from "../utils/asserts.ts"; import { getErrorMessage } from "../utils/utils.ts"; import { setGlobalsForFile, resetGlobals } from "./globals.ts"; @@ -154,7 +155,7 @@ export function lintFileImpl( // // But... source text and AST can be accessed in body of `create` method, or `before` hook, via `context.sourceCode`. // So we pass the buffer to source code module here, so it can decode source text / deserialize AST on demand. - const hasBOM = false; // TODO: Set this correctly + const hasBOM = buffer[HAS_BOM_FLAG_POS] === 1; const parserServices = PARSER_SERVICES_DEFAULT; // TODO: Set this correctly setupSourceForFile(buffer, hasBOM, parserServices); diff --git a/apps/oxlint/src/js_plugins/parse.rs b/apps/oxlint/src/js_plugins/parse.rs index d7b2ef3125389..6976639e47917 100644 --- a/apps/oxlint/src/js_plugins/parse.rs +++ b/apps/oxlint/src/js_plugins/parse.rs @@ -151,7 +151,7 @@ unsafe fn parse_raw_impl( // Parse source. // Enclose parsing logic in a scope to make 100% sure no references to within `Allocator` exist after this. - let program_offset = { + let (program_offset, has_bom) = { // SAFETY: We checked above that `source_len` does not exceed length of buffer let source_text = unsafe { buffer.get_unchecked(..source_len) }; // SAFETY: Caller guarantees source occupies this region of the buffer and is valid UTF-8 @@ -179,22 +179,46 @@ unsafe fn parse_raw_impl( if parsing_failed { // Use sentinel value for program offset to indicate that parsing failed - PARSE_FAIL_SENTINEL + (PARSE_FAIL_SENTINEL, false) } else { - // Convert spans to UTF-16 - let span_converter = Utf8ToUtf16::new(source_text); + // If has BOM, remove it + const BOM: &str = "\u{feff}"; + const BOM_LEN: usize = BOM.len(); + + let mut source_text = program.source_text; + let has_bom = source_text.starts_with(BOM); + if has_bom { + source_text = &source_text[BOM_LEN..]; + program.source_text = source_text; + } + + // Convert spans to UTF-16. + // If source starts with BOM, create converter which ignores the BOM. + let span_converter = if has_bom { + #[expect(clippy::cast_possible_truncation)] + Utf8ToUtf16::new_with_offset(source_text, BOM_LEN as u32) + } else { + Utf8ToUtf16::new(source_text) + }; + span_converter.convert_program(program); span_converter.convert_comments(&mut program.comments); // Return offset of `Program` within buffer (bottom 32 bits of pointer) - ptr::from_ref(program) as u32 + let program_offset = ptr::from_ref(program) as u32; + + (program_offset, has_bom) } }; // Write metadata into end of buffer #[allow(clippy::cast_possible_truncation)] - let metadata = - RawTransferMetadata::new(program_offset, source_type.is_typescript(), source_type.is_jsx()); + let metadata = RawTransferMetadata::new( + program_offset, + source_type.is_typescript(), + source_type.is_jsx(), + has_bom, + ); const RAW_METADATA_OFFSET: usize = BUFFER_SIZE - RAW_METADATA_SIZE; const _: () = assert!(RAW_METADATA_OFFSET.is_multiple_of(BUMP_ALIGN)); // SAFETY: `RAW_METADATA_OFFSET` is less than length of `buffer`. diff --git a/apps/oxlint/test/fixtures/bom/.oxlintrc.json b/apps/oxlint/test/fixtures/bom/.oxlintrc.json new file mode 100644 index 0000000000000..3c5f25df0e572 --- /dev/null +++ b/apps/oxlint/test/fixtures/bom/.oxlintrc.json @@ -0,0 +1,7 @@ +{ + "categories": { "correctness": "off" }, + "jsPlugins": ["./plugin.ts"], + "rules": { + "bom-plugin/bom": "error" + } +} diff --git a/apps/oxlint/test/fixtures/bom/files/bom.js b/apps/oxlint/test/fixtures/bom/files/bom.js new file mode 100644 index 0000000000000..7243938e0af3b --- /dev/null +++ b/apps/oxlint/test/fixtures/bom/files/bom.js @@ -0,0 +1,3 @@ +debugger; +debugger; +debugger; \ No newline at end of file diff --git a/apps/oxlint/test/fixtures/bom/files/bom_unicode.js b/apps/oxlint/test/fixtures/bom/files/bom_unicode.js new file mode 100644 index 0000000000000..f3f07a4100841 --- /dev/null +++ b/apps/oxlint/test/fixtures/bom/files/bom_unicode.js @@ -0,0 +1,4 @@ +debugger; +// 😀🤪😆😎🤮 +debugger; +debugger; \ No newline at end of file diff --git a/apps/oxlint/test/fixtures/bom/files/no_bom.js b/apps/oxlint/test/fixtures/bom/files/no_bom.js new file mode 100644 index 0000000000000..6facc5ab449b2 --- /dev/null +++ b/apps/oxlint/test/fixtures/bom/files/no_bom.js @@ -0,0 +1,3 @@ +debugger; +debugger; +debugger; \ No newline at end of file diff --git a/apps/oxlint/test/fixtures/bom/files/no_bom_unicode.js b/apps/oxlint/test/fixtures/bom/files/no_bom_unicode.js new file mode 100644 index 0000000000000..57d9fcb325f86 --- /dev/null +++ b/apps/oxlint/test/fixtures/bom/files/no_bom_unicode.js @@ -0,0 +1,4 @@ +debugger; +// 😀🤪😆😎🤮 +debugger; +debugger; \ No newline at end of file diff --git a/apps/oxlint/test/fixtures/bom/output.snap.md b/apps/oxlint/test/fixtures/bom/output.snap.md new file mode 100644 index 0000000000000..ed9200a04077e --- /dev/null +++ b/apps/oxlint/test/fixtures/bom/output.snap.md @@ -0,0 +1,144 @@ +# Exit code +1 + +# stdout +``` + x bom-plugin(bom): Debugger statement at 0-9 + ,-[files/bom.js:1:4] + 1 | debugger; + : ^^^^^^^^^ + 2 | debugger; + `---- + + x bom-plugin(bom): + | hasBOM: true + | sourceText: "debugger;\ndebugger;\ndebugger;" + | Program span: 0-29 + ,-[files/bom.js:1:4] + 1 | ,-> debugger; + 2 | | debugger; + 3 | `-> debugger; + `---- + + x bom-plugin(bom): Debugger statement at 10-19 + ,-[files/bom.js:2:1] + 1 | debugger; + 2 | debugger; + : ^^^^^^^^^ + 3 | debugger; + `---- + + x bom-plugin(bom): Debugger statement at 20-29 + ,-[files/bom.js:3:1] + 2 | debugger; + 3 | debugger; + : ^^^^^^^^^ + `---- + + x bom-plugin(bom): Debugger statement at 0-9 + ,-[files/bom_unicode.js:1:4] + 1 | debugger; + : ^^^^^^^^^ + 2 | // 😀🤪😆😎🤮 + `---- + + x bom-plugin(bom): + | hasBOM: true + | sourceText: "debugger;\n// 😀🤪😆😎🤮\ndebugger;\ndebugger;" + | Program span: 0-43 + ,-[files/bom_unicode.js:1:4] + 1 | ,-> debugger; + 2 | | // 😀🤪😆😎🤮 + 3 | | debugger; + 4 | `-> debugger; + `---- + + x bom-plugin(bom): Debugger statement at 24-33 + ,-[files/bom_unicode.js:3:1] + 2 | // 😀🤪😆😎🤮 + 3 | debugger; + : ^^^^^^^^^ + 4 | debugger; + `---- + + x bom-plugin(bom): Debugger statement at 34-43 + ,-[files/bom_unicode.js:4:1] + 3 | debugger; + 4 | debugger; + : ^^^^^^^^^ + `---- + + x bom-plugin(bom): Debugger statement at 0-9 + ,-[files/no_bom.js:1:1] + 1 | debugger; + : ^^^^^^^^^ + 2 | debugger; + `---- + + x bom-plugin(bom): + | hasBOM: false + | sourceText: "debugger;\ndebugger;\ndebugger;" + | Program span: 0-29 + ,-[files/no_bom.js:1:1] + 1 | ,-> debugger; + 2 | | debugger; + 3 | `-> debugger; + `---- + + x bom-plugin(bom): Debugger statement at 10-19 + ,-[files/no_bom.js:2:1] + 1 | debugger; + 2 | debugger; + : ^^^^^^^^^ + 3 | debugger; + `---- + + x bom-plugin(bom): Debugger statement at 20-29 + ,-[files/no_bom.js:3:1] + 2 | debugger; + 3 | debugger; + : ^^^^^^^^^ + `---- + + x bom-plugin(bom): Debugger statement at 0-9 + ,-[files/no_bom_unicode.js:1:1] + 1 | debugger; + : ^^^^^^^^^ + 2 | // 😀🤪😆😎🤮 + `---- + + x bom-plugin(bom): + | hasBOM: false + | sourceText: "debugger;\n// 😀🤪😆😎🤮\ndebugger;\ndebugger;" + | Program span: 0-43 + ,-[files/no_bom_unicode.js:1:1] + 1 | ,-> debugger; + 2 | | // 😀🤪😆😎🤮 + 3 | | debugger; + 4 | `-> debugger; + `---- + + x bom-plugin(bom): Debugger statement at 24-33 + ,-[files/no_bom_unicode.js:3:1] + 2 | // 😀🤪😆😎🤮 + 3 | debugger; + : ^^^^^^^^^ + 4 | debugger; + `---- + + x bom-plugin(bom): Debugger statement at 34-43 + ,-[files/no_bom_unicode.js:4:1] + 3 | debugger; + 4 | debugger; + : ^^^^^^^^^ + `---- + +Found 0 warnings and 16 errors. +Finished in Xms on 4 files with 1 rules using X threads. +``` + +# stderr +``` +WARNING: JS plugins are experimental and not subject to semver. +Breaking changes are possible while JS plugins support is under development. +``` diff --git a/apps/oxlint/test/fixtures/bom/plugin.ts b/apps/oxlint/test/fixtures/bom/plugin.ts new file mode 100644 index 0000000000000..aa83caec04f00 --- /dev/null +++ b/apps/oxlint/test/fixtures/bom/plugin.ts @@ -0,0 +1,33 @@ +import type { Plugin } from "#oxlint"; + +const plugin: Plugin = { + meta: { + name: "bom-plugin", + }, + rules: { + bom: { + create(context) { + return { + Program(node) { + context.report({ + message: + "\n" + + `hasBOM: ${context.sourceCode.hasBOM}\n` + + `sourceText: ${JSON.stringify(context.sourceCode.text)}\n` + + `Program span: ${node.start}-${node.end}`, + node, + }); + }, + DebuggerStatement(node) { + context.report({ + message: `Debugger statement at ${node.start}-${node.end}`, + node, + }); + }, + }; + }, + }, + }, +}; + +export default plugin; diff --git a/crates/oxc_ast_macros/src/generated/structs.rs b/crates/oxc_ast_macros/src/generated/structs.rs index 0fa49a5704e21..590c6b7a15c0c 100644 --- a/crates/oxc_ast_macros/src/generated/structs.rs +++ b/crates/oxc_ast_macros/src/generated/structs.rs @@ -96,7 +96,7 @@ pub static STRUCTS: phf::Map<&'static str, StructDetails> = ::phf::Map { ("TSInterfaceBody", StructDetails { field_order: None }), ("CatchParameter", StructDetails { field_order: None }), ("RawTransferData", StructDetails { field_order: None }), - ("RawTransferMetadata", StructDetails { field_order: Some(&[1, 2, 3, 0]) }), + ("RawTransferMetadata", StructDetails { field_order: Some(&[1, 2, 3, 4, 0]) }), ("ObjectProperty", StructDetails { field_order: Some(&[0, 3, 1, 2, 4, 5, 6]) }), ("DebuggerStatement", StructDetails { field_order: None }), ("TSModuleDeclaration", StructDetails { field_order: Some(&[0, 1, 2, 4, 5, 3]) }), @@ -136,7 +136,7 @@ pub static STRUCTS: phf::Map<&'static str, StructDetails> = ::phf::Map { ("TSTupleType", StructDetails { field_order: None }), ("TSTypeParameter", StructDetails { field_order: None }), ("ErrorLabel", StructDetails { field_order: Some(&[1, 0]) }), - ("RawTransferMetadata2", StructDetails { field_order: Some(&[1, 2, 3, 0]) }), + ("RawTransferMetadata2", StructDetails { field_order: Some(&[1, 2, 3, 4, 0]) }), ("ChainExpression", StructDetails { field_order: None }), ("BindingProperty", StructDetails { field_order: None }), ("NullLiteral", StructDetails { field_order: None }), diff --git a/crates/oxc_ast_visit/src/utf8_to_utf16/mod.rs b/crates/oxc_ast_visit/src/utf8_to_utf16/mod.rs index 406254520df4b..ee02468fa68af 100644 --- a/crates/oxc_ast_visit/src/utf8_to_utf16/mod.rs +++ b/crates/oxc_ast_visit/src/utf8_to_utf16/mod.rs @@ -22,7 +22,7 @@ impl Utf8ToUtf16 { translations.push(Translation { utf8_offset: 0, utf16_difference: 0 }); - build_translations(source_text, &mut translations); + build_translations(source_text, &mut translations, 0); // If no translations have been added after the first `0, 0` dummy, then source is entirely ASCII. // Remove the dummy entry. @@ -40,6 +40,24 @@ impl Utf8ToUtf16 { Self { translations } } + /// Create new [`Utf8ToUtf16`] conversion table from source text with an offset. + /// + /// `offset` is the number of bytes to subtract from UTF-8 offsets before converting to UTF-16. + /// These bytes should not be part of `source_text` string. + /// + /// If file starts with a BOM and UTF-16 offsets should be for the source text without the BOM, + /// pass `source_text` with the BOM trimmed from the start, and `offset` as 3 (length of BOM in UTF-8 bytes). + pub fn new_with_offset(source_text: &str, offset: u32) -> Self { + let mut translations = Vec::with_capacity(16); + + translations.push(Translation { utf8_offset: 0, utf16_difference: 0 }); + translations.push(Translation { utf8_offset: offset, utf16_difference: offset }); + + build_translations(source_text, &mut translations, offset); + + Self { translations } + } + /// Create a [`Utf8ToUtf16Converter`] converter, to convert offsets from UTF-8 to UTF-16. /// /// The converter is optimized for converting a sequence of offsets in ascending order. diff --git a/crates/oxc_ast_visit/src/utf8_to_utf16/translation.rs b/crates/oxc_ast_visit/src/utf8_to_utf16/translation.rs index 483e9b26cf2df..1dccbb60a28df 100644 --- a/crates/oxc_ast_visit/src/utf8_to_utf16/translation.rs +++ b/crates/oxc_ast_visit/src/utf8_to_utf16/translation.rs @@ -52,6 +52,8 @@ impl AlignedChunk { /// Build table of translations from UTF-8 offsets to UTF-16 offsets. /// +/// `offset` is the starting offset. Usually 0, unless trimming BOM from start of file. +/// /// Process bulk of source text in chunks of 32 bytes, using SIMD instructions. /// This should be much faster than byte-by-byte processing, assuming non-ASCII chars are rare in source code. /// @@ -75,9 +77,9 @@ impl AlignedChunk { /// UTF-16 len = UTF-8 len - 2 /// /// So UTF-16 offset = UTF-8 offset - count of bytes `>= 0xC0` - count of bytes `>= 0xE0` -pub fn build_translations(source_text: &str, translations: &mut Vec) { +pub fn build_translations(source_text: &str, translations: &mut Vec, offset: u32) { // Running counter of difference between UTF-8 and UTF-16 offset - let mut utf16_difference = 0; + let mut utf16_difference = offset; // Closure that processes a slice of bytes let mut process_slice = |slice: &[u8], start_offset: usize| { diff --git a/crates/oxc_linter/src/generated/assert_layouts.rs b/crates/oxc_linter/src/generated/assert_layouts.rs index 15b83f8606282..c71a60c839c65 100644 --- a/crates/oxc_linter/src/generated/assert_layouts.rs +++ b/crates/oxc_linter/src/generated/assert_layouts.rs @@ -9,23 +9,25 @@ use crate::*; #[cfg(target_pointer_width = "64")] const _: () = { - // Padding: 2 bytes + // Padding: 1 bytes assert!(size_of::() == 16); assert!(align_of::() == 8); assert!(offset_of!(RawTransferMetadata2, data_offset) == 8); assert!(offset_of!(RawTransferMetadata2, is_ts) == 12); assert!(offset_of!(RawTransferMetadata2, is_jsx) == 13); + assert!(offset_of!(RawTransferMetadata2, has_bom) == 14); assert!(offset_of!(RawTransferMetadata2, _padding) == 0); }; #[cfg(target_pointer_width = "32")] const _: () = if cfg!(target_family = "wasm") || align_of::() == 8 { - // Padding: 2 bytes + // Padding: 1 bytes assert!(size_of::() == 16); assert!(align_of::() == 8); assert!(offset_of!(RawTransferMetadata2, data_offset) == 8); assert!(offset_of!(RawTransferMetadata2, is_ts) == 12); assert!(offset_of!(RawTransferMetadata2, is_jsx) == 13); + assert!(offset_of!(RawTransferMetadata2, has_bom) == 14); assert!(offset_of!(RawTransferMetadata2, _padding) == 0); }; diff --git a/crates/oxc_linter/src/lib.rs b/crates/oxc_linter/src/lib.rs index 6c81d04d1de7e..9b3b790ac1b14 100644 --- a/crates/oxc_linter/src/lib.rs +++ b/crates/oxc_linter/src/lib.rs @@ -545,10 +545,26 @@ impl Linter { program: &mut Program<'_>, allocator: &Allocator, ) { - let source_text = program.source_text; + // If has BOM, remove it + const BOM: &str = "\u{feff}"; + const BOM_LEN: usize = BOM.len(); + + let mut source_text = program.source_text; + let has_bom = source_text.starts_with(BOM); + if has_bom { + source_text = &source_text[BOM_LEN..]; + program.source_text = source_text; + } + + // Convert spans to UTF-16. + // If source starts with BOM, create converter which ignores the BOM. + let span_converter = if has_bom { + #[expect(clippy::cast_possible_truncation)] + Utf8ToUtf16::new_with_offset(source_text, BOM_LEN as u32) + } else { + Utf8ToUtf16::new(source_text) + }; - // Convert spans to UTF-16 - let span_converter = Utf8ToUtf16::new(source_text); span_converter.convert_program(program); span_converter.convert_comments(&mut program.comments); @@ -558,7 +574,7 @@ impl Linter { // Write offset of `Program` in metadata at end of buffer let is_ts = program.source_type.is_typescript(); let is_jsx = program.source_type.is_jsx(); - let metadata = RawTransferMetadata::new(program_offset, is_ts, is_jsx); + let metadata = RawTransferMetadata::new(program_offset, is_ts, is_jsx, has_bom); let metadata_ptr = allocator.end_ptr().cast::(); // SAFETY: `Allocator` was created by `FixedSizeAllocator` which reserved space after `end_ptr` // for a `RawTransferMetadata`. `end_ptr` is aligned for `RawTransferMetadata`. @@ -693,6 +709,8 @@ pub struct RawTransferMetadata2 { pub is_ts: bool, /// `true` if AST is JSX. pub is_jsx: bool, + /// `true` if source text has a BOM. + pub has_bom: bool, /// Padding to pad struct to size 16. pub(crate) _padding: u64, } @@ -700,7 +718,7 @@ pub struct RawTransferMetadata2 { use RawTransferMetadata2 as RawTransferMetadata; impl RawTransferMetadata { - pub fn new(data_offset: u32, is_ts: bool, is_jsx: bool) -> Self { - Self { data_offset, is_ts, is_jsx, _padding: 0 } + pub fn new(data_offset: u32, is_ts: bool, is_jsx: bool, has_bom: bool) -> Self { + Self { data_offset, is_ts, is_jsx, has_bom, _padding: 0 } } } diff --git a/napi/parser/src-js/generated/constants.js b/napi/parser/src-js/generated/constants.js index 7bd97a8f28ead..51782c7b49ecf 100644 --- a/napi/parser/src-js/generated/constants.js +++ b/napi/parser/src-js/generated/constants.js @@ -6,6 +6,7 @@ export const BUFFER_ALIGN = 4294967296; export const DATA_POINTER_POS_32 = 536870902; export const IS_TS_FLAG_POS = 2147483612; export const IS_JSX_FLAG_POS = 2147483613; +export const HAS_BOM_FLAG_POS = 2147483614; export const PROGRAM_OFFSET = 0; export const SOURCE_START_OFFSET = 8; export const SOURCE_LEN_OFFSET = 16; diff --git a/napi/parser/src/generated/assert_layouts.rs b/napi/parser/src/generated/assert_layouts.rs index f8631288b83f5..04c00c633584d 100644 --- a/napi/parser/src/generated/assert_layouts.rs +++ b/napi/parser/src/generated/assert_layouts.rs @@ -17,12 +17,13 @@ const _: () = { assert!(offset_of!(RawTransferData, module) == 152); assert!(offset_of!(RawTransferData, errors) == 256); - // Padding: 2 bytes + // Padding: 1 bytes assert!(size_of::() == 16); assert!(align_of::() == 8); assert!(offset_of!(RawTransferMetadata, data_offset) == 8); assert!(offset_of!(RawTransferMetadata, is_ts) == 12); assert!(offset_of!(RawTransferMetadata, is_jsx) == 13); + assert!(offset_of!(RawTransferMetadata, has_bom) == 14); assert!(offset_of!(RawTransferMetadata, _padding) == 0); // Padding: 7 bytes @@ -76,12 +77,13 @@ const _: () = if cfg!(target_family = "wasm") || align_of::() == 8 { assert!(offset_of!(RawTransferData, module) == 104); assert!(offset_of!(RawTransferData, errors) == 172); - // Padding: 2 bytes + // Padding: 1 bytes assert!(size_of::() == 16); assert!(align_of::() == 8); assert!(offset_of!(RawTransferMetadata, data_offset) == 8); assert!(offset_of!(RawTransferMetadata, is_ts) == 12); assert!(offset_of!(RawTransferMetadata, is_jsx) == 13); + assert!(offset_of!(RawTransferMetadata, has_bom) == 14); assert!(offset_of!(RawTransferMetadata, _padding) == 0); // Padding: 3 bytes diff --git a/napi/parser/src/raw_transfer_types.rs b/napi/parser/src/raw_transfer_types.rs index f0792ea6be493..5f9e79f580be7 100644 --- a/napi/parser/src/raw_transfer_types.rs +++ b/napi/parser/src/raw_transfer_types.rs @@ -38,13 +38,15 @@ pub struct RawTransferMetadata { pub is_ts: bool, /// This field always contains `false` in parser. It's only used in linter. pub is_jsx: bool, + /// This field always contains `false` in parser. It's only used in linter. + pub has_bom: bool, /// Padding to pad struct to size 16. pub(crate) _padding: u64, } impl RawTransferMetadata { pub fn new(data_offset: u32, is_ts: bool) -> Self { - Self { data_offset, is_ts, is_jsx: false, _padding: 0 } + Self { data_offset, is_ts, is_jsx: false, has_bom: false, _padding: 0 } } } diff --git a/oxfmtrc.jsonc b/oxfmtrc.jsonc index b4ccd8c438693..4215c6c9960e4 100644 --- a/oxfmtrc.jsonc +++ b/oxfmtrc.jsonc @@ -7,6 +7,7 @@ // Ignore `fixtures` directories except for `apps/oxlint/test/fixtures` "**/fixtures/**", "!apps/oxlint/test/fixtures/**", + "apps/oxlint/test/fixtures/bom/files/**", "**/dist/**", "**/generated/**", "**/CHANGELOG.md", diff --git a/tasks/ast_tools/src/generators/raw_transfer.rs b/tasks/ast_tools/src/generators/raw_transfer.rs index 74c663675964f..fda78b598961b 100644 --- a/tasks/ast_tools/src/generators/raw_transfer.rs +++ b/tasks/ast_tools/src/generators/raw_transfer.rs @@ -1298,6 +1298,8 @@ struct Constants { is_ts_pos: u32, /// Offset within buffer of `bool` indicating if AST is JSX is_jsx_pos: u32, + /// Offset within buffer of `bool` indicating if source text has BOM + has_bom_pos: u32, /// Offset of `Program` in buffer, relative to position of `RawTransferData` program_offset: u32, /// Offset of `u32` source text start pos, relative to position of `Program` @@ -1315,6 +1317,7 @@ fn generate_constants(consts: Constants) -> (String, TokenStream) { data_pointer_pos, is_ts_pos, is_jsx_pos, + has_bom_pos, program_offset, source_start_offset, source_len_offset, @@ -1330,6 +1333,7 @@ fn generate_constants(consts: Constants) -> (String, TokenStream) { export const DATA_POINTER_POS_32 = {data_pointer_pos_32}; export const IS_TS_FLAG_POS = {is_ts_pos}; export const IS_JSX_FLAG_POS = {is_jsx_pos}; + export const HAS_BOM_FLAG_POS = {has_bom_pos}; export const PROGRAM_OFFSET = {program_offset}; export const SOURCE_START_OFFSET = {source_start_offset}; export const SOURCE_LEN_OFFSET = {source_len_offset}; @@ -1365,6 +1369,7 @@ fn get_constants(schema: &Schema) -> Constants { let mut data_offset_field = None; let mut is_ts_field = None; let mut is_jsx_field = None; + let mut has_bom_field = None; for (field1, field2) in raw_metadata_struct.fields.iter().zip(&raw_metadata2_struct.fields) { assert_eq!(field1.name(), field2.name()); assert_eq!(field1.type_id, field2.type_id); @@ -1373,12 +1378,14 @@ fn get_constants(schema: &Schema) -> Constants { "data_offset" => data_offset_field = Some(field1), "is_ts" => is_ts_field = Some(field1), "is_jsx" => is_jsx_field = Some(field1), + "has_bom" => has_bom_field = Some(field1), _ => {} } } let data_offset_field = data_offset_field.unwrap(); let is_ts_field = is_ts_field.unwrap(); let is_jsx_field = is_jsx_field.unwrap(); + let has_bom_field = has_bom_field.unwrap(); let raw_metadata_size = raw_metadata_struct.layout_64().size; @@ -1395,6 +1402,7 @@ fn get_constants(schema: &Schema) -> Constants { let data_pointer_pos = raw_metadata_pos + data_offset_field.offset_64(); let is_ts_pos = raw_metadata_pos + is_ts_field.offset_64(); let is_jsx_pos = raw_metadata_pos + is_jsx_field.offset_64(); + let has_bom_pos = raw_metadata_pos + has_bom_field.offset_64(); let program_offset = schema .type_by_name("RawTransferData") @@ -1417,6 +1425,7 @@ fn get_constants(schema: &Schema) -> Constants { data_pointer_pos, is_ts_pos, is_jsx_pos, + has_bom_pos, program_offset, source_start_offset, source_len_offset,