diff --git a/apps/oxlint/src-js/generated/deserialize.js b/apps/oxlint/src-js/generated/deserialize.js index 85d4d03f1b01a..2cdbdb74e417b 100644 --- a/apps/oxlint/src-js/generated/deserialize.js +++ b/apps/oxlint/src-js/generated/deserialize.js @@ -10,6 +10,7 @@ let uint8, sourceText, sourceIsAscii, sourceStartPos, + firstNonAsciiPos, parent = null, getLoc; @@ -42,6 +43,14 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de float64 = buffer.float64; sourceText = sourceTextInput; sourceIsAscii = sourceText.length === sourceByteLen; + if (!sourceIsAscii) { + firstNonAsciiPos = sourceByteLen; + for (let i = sourceStartPos, e = sourceStartPos + sourceByteLen; i < e; i++) + if (uint8[i] >= 128) { + firstNonAsciiPos = i - sourceStartPos; + break; + } + } getLoc = getLocInput; return deserialize(uint32[536870900]); } @@ -5857,11 +5866,12 @@ function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - if (sourceIsAscii && pos >= sourceStartPos) return sourceText.substr(pos - sourceStartPos, len); - // Longer strings use `TextDecoder` - // TODO: Find best switch-over point + if (pos >= sourceStartPos && (sourceIsAscii || pos - sourceStartPos + len <= firstNonAsciiPos)) + return sourceText.substr(pos - sourceStartPos, len); + // Use `TextDecoder` for strings longer than 9 bytes. + // For shorter strings, the byte-by-byte loop below avoids native call overhead. let end = pos + len; - if (len > 50) return decodeStr(uint8.subarray(pos, end)); + if (len > 9) return decodeStr(uint8.subarray(pos, end)); // Shorter strings decode by hand to avoid native call let out = "", c; diff --git a/napi/parser/src-js/generated/deserialize/js.js b/napi/parser/src-js/generated/deserialize/js.js index 04dad28fa2608..4a68737c8baef 100644 --- a/napi/parser/src-js/generated/deserialize/js.js +++ b/napi/parser/src-js/generated/deserialize/js.js @@ -1,7 +1,7 @@ // Auto-generated code, DO NOT EDIT DIRECTLY! // To edit this generated file you have to edit `tasks/ast_tools/src/generators/raw_transfer.rs`. -let uint8, uint32, float64, sourceText, sourceIsAscii, sourceEndPos; +let uint8, uint32, float64, sourceText, sourceIsAscii, sourceEndPos, firstNonAsciiPos; const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }), decodeStr = textDecoder.decode.bind(textDecoder), @@ -20,6 +20,14 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de float64 = buffer.float64; sourceText = sourceTextInput; sourceIsAscii = sourceText.length === sourceByteLen; + if (!sourceIsAscii) { + firstNonAsciiPos = sourceByteLen; + for (let i = 0; i < sourceByteLen; i++) + if (uint8[i] >= 128) { + firstNonAsciiPos = i; + break; + } + } return deserialize(uint32[536870900]); } @@ -4513,11 +4521,12 @@ function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - if (sourceIsAscii && pos < sourceEndPos) return sourceText.substr(pos, len); - // Longer strings use `TextDecoder` - // TODO: Find best switch-over point + if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos)) + return sourceText.substr(pos, len); + // Use `TextDecoder` for strings longer than 9 bytes. + // For shorter strings, the byte-by-byte loop below avoids native call overhead. let end = pos + len; - if (len > 50) return decodeStr(uint8.subarray(pos, end)); + if (len > 9) return decodeStr(uint8.subarray(pos, end)); // Shorter strings decode by hand to avoid native call let out = "", c; diff --git a/napi/parser/src-js/generated/deserialize/js_parent.js b/napi/parser/src-js/generated/deserialize/js_parent.js index ddd97fed54b06..6ef044bdf1d5f 100644 --- a/napi/parser/src-js/generated/deserialize/js_parent.js +++ b/napi/parser/src-js/generated/deserialize/js_parent.js @@ -7,6 +7,7 @@ let uint8, sourceText, sourceIsAscii, sourceEndPos, + firstNonAsciiPos, parent = null; const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }), @@ -26,6 +27,14 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de float64 = buffer.float64; sourceText = sourceTextInput; sourceIsAscii = sourceText.length === sourceByteLen; + if (!sourceIsAscii) { + firstNonAsciiPos = sourceByteLen; + for (let i = 0; i < sourceByteLen; i++) + if (uint8[i] >= 128) { + firstNonAsciiPos = i; + break; + } + } return deserialize(uint32[536870900]); } @@ -5049,11 +5058,12 @@ function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - if (sourceIsAscii && pos < sourceEndPos) return sourceText.substr(pos, len); - // Longer strings use `TextDecoder` - // TODO: Find best switch-over point + if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos)) + return sourceText.substr(pos, len); + // Use `TextDecoder` for strings longer than 9 bytes. + // For shorter strings, the byte-by-byte loop below avoids native call overhead. let end = pos + len; - if (len > 50) return decodeStr(uint8.subarray(pos, end)); + if (len > 9) return decodeStr(uint8.subarray(pos, end)); // Shorter strings decode by hand to avoid native call let out = "", c; diff --git a/napi/parser/src-js/generated/deserialize/js_range.js b/napi/parser/src-js/generated/deserialize/js_range.js index 39afc4798d7fe..775d908455ae5 100644 --- a/napi/parser/src-js/generated/deserialize/js_range.js +++ b/napi/parser/src-js/generated/deserialize/js_range.js @@ -1,7 +1,7 @@ // Auto-generated code, DO NOT EDIT DIRECTLY! // To edit this generated file you have to edit `tasks/ast_tools/src/generators/raw_transfer.rs`. -let uint8, uint32, float64, sourceText, sourceIsAscii, sourceEndPos; +let uint8, uint32, float64, sourceText, sourceIsAscii, sourceEndPos, firstNonAsciiPos; const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }), decodeStr = textDecoder.decode.bind(textDecoder), @@ -20,6 +20,14 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de float64 = buffer.float64; sourceText = sourceTextInput; sourceIsAscii = sourceText.length === sourceByteLen; + if (!sourceIsAscii) { + firstNonAsciiPos = sourceByteLen; + for (let i = 0; i < sourceByteLen; i++) + if (uint8[i] >= 128) { + firstNonAsciiPos = i; + break; + } + } return deserialize(uint32[536870900]); } @@ -5063,11 +5071,12 @@ function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - if (sourceIsAscii && pos < sourceEndPos) return sourceText.substr(pos, len); - // Longer strings use `TextDecoder` - // TODO: Find best switch-over point + if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos)) + return sourceText.substr(pos, len); + // Use `TextDecoder` for strings longer than 9 bytes. + // For shorter strings, the byte-by-byte loop below avoids native call overhead. let end = pos + len; - if (len > 50) return decodeStr(uint8.subarray(pos, end)); + if (len > 9) return decodeStr(uint8.subarray(pos, end)); // Shorter strings decode by hand to avoid native call let out = "", c; diff --git a/napi/parser/src-js/generated/deserialize/js_range_parent.js b/napi/parser/src-js/generated/deserialize/js_range_parent.js index dad53373d4ecd..40bae9adeb148 100644 --- a/napi/parser/src-js/generated/deserialize/js_range_parent.js +++ b/napi/parser/src-js/generated/deserialize/js_range_parent.js @@ -7,6 +7,7 @@ let uint8, sourceText, sourceIsAscii, sourceEndPos, + firstNonAsciiPos, parent = null; const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }), @@ -26,6 +27,14 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de float64 = buffer.float64; sourceText = sourceTextInput; sourceIsAscii = sourceText.length === sourceByteLen; + if (!sourceIsAscii) { + firstNonAsciiPos = sourceByteLen; + for (let i = 0; i < sourceByteLen; i++) + if (uint8[i] >= 128) { + firstNonAsciiPos = i; + break; + } + } return deserialize(uint32[536870900]); } @@ -5602,11 +5611,12 @@ function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - if (sourceIsAscii && pos < sourceEndPos) return sourceText.substr(pos, len); - // Longer strings use `TextDecoder` - // TODO: Find best switch-over point + if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos)) + return sourceText.substr(pos, len); + // Use `TextDecoder` for strings longer than 9 bytes. + // For shorter strings, the byte-by-byte loop below avoids native call overhead. let end = pos + len; - if (len > 50) return decodeStr(uint8.subarray(pos, end)); + if (len > 9) return decodeStr(uint8.subarray(pos, end)); // Shorter strings decode by hand to avoid native call let out = "", c; diff --git a/napi/parser/src-js/generated/deserialize/ts.js b/napi/parser/src-js/generated/deserialize/ts.js index 38f73fc57291b..f763b19b58931 100644 --- a/napi/parser/src-js/generated/deserialize/ts.js +++ b/napi/parser/src-js/generated/deserialize/ts.js @@ -1,7 +1,7 @@ // Auto-generated code, DO NOT EDIT DIRECTLY! // To edit this generated file you have to edit `tasks/ast_tools/src/generators/raw_transfer.rs`. -let uint8, uint32, float64, sourceText, sourceIsAscii, sourceEndPos; +let uint8, uint32, float64, sourceText, sourceIsAscii, sourceEndPos, firstNonAsciiPos; const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }), decodeStr = textDecoder.decode.bind(textDecoder), @@ -20,6 +20,14 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de float64 = buffer.float64; sourceText = sourceTextInput; sourceIsAscii = sourceText.length === sourceByteLen; + if (!sourceIsAscii) { + firstNonAsciiPos = sourceByteLen; + for (let i = 0; i < sourceByteLen; i++) + if (uint8[i] >= 128) { + firstNonAsciiPos = i; + break; + } + } return deserialize(uint32[536870900]); } @@ -4822,11 +4830,12 @@ function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - if (sourceIsAscii && pos < sourceEndPos) return sourceText.substr(pos, len); - // Longer strings use `TextDecoder` - // TODO: Find best switch-over point + if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos)) + return sourceText.substr(pos, len); + // Use `TextDecoder` for strings longer than 9 bytes. + // For shorter strings, the byte-by-byte loop below avoids native call overhead. let end = pos + len; - if (len > 50) return decodeStr(uint8.subarray(pos, end)); + if (len > 9) return decodeStr(uint8.subarray(pos, end)); // Shorter strings decode by hand to avoid native call let out = "", c; diff --git a/napi/parser/src-js/generated/deserialize/ts_parent.js b/napi/parser/src-js/generated/deserialize/ts_parent.js index 14af741462fd8..9f1c16475f531 100644 --- a/napi/parser/src-js/generated/deserialize/ts_parent.js +++ b/napi/parser/src-js/generated/deserialize/ts_parent.js @@ -7,6 +7,7 @@ let uint8, sourceText, sourceIsAscii, sourceEndPos, + firstNonAsciiPos, parent = null; const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }), @@ -26,6 +27,14 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de float64 = buffer.float64; sourceText = sourceTextInput; sourceIsAscii = sourceText.length === sourceByteLen; + if (!sourceIsAscii) { + firstNonAsciiPos = sourceByteLen; + for (let i = 0; i < sourceByteLen; i++) + if (uint8[i] >= 128) { + firstNonAsciiPos = i; + break; + } + } return deserialize(uint32[536870900]); } @@ -5385,11 +5394,12 @@ function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - if (sourceIsAscii && pos < sourceEndPos) return sourceText.substr(pos, len); - // Longer strings use `TextDecoder` - // TODO: Find best switch-over point + if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos)) + return sourceText.substr(pos, len); + // Use `TextDecoder` for strings longer than 9 bytes. + // For shorter strings, the byte-by-byte loop below avoids native call overhead. let end = pos + len; - if (len > 50) return decodeStr(uint8.subarray(pos, end)); + if (len > 9) return decodeStr(uint8.subarray(pos, end)); // Shorter strings decode by hand to avoid native call let out = "", c; diff --git a/napi/parser/src-js/generated/deserialize/ts_range.js b/napi/parser/src-js/generated/deserialize/ts_range.js index 52b146779a9b5..da5c68d3dc99d 100644 --- a/napi/parser/src-js/generated/deserialize/ts_range.js +++ b/napi/parser/src-js/generated/deserialize/ts_range.js @@ -1,7 +1,7 @@ // Auto-generated code, DO NOT EDIT DIRECTLY! // To edit this generated file you have to edit `tasks/ast_tools/src/generators/raw_transfer.rs`. -let uint8, uint32, float64, sourceText, sourceIsAscii, sourceEndPos; +let uint8, uint32, float64, sourceText, sourceIsAscii, sourceEndPos, firstNonAsciiPos; const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }), decodeStr = textDecoder.decode.bind(textDecoder), @@ -20,6 +20,14 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de float64 = buffer.float64; sourceText = sourceTextInput; sourceIsAscii = sourceText.length === sourceByteLen; + if (!sourceIsAscii) { + firstNonAsciiPos = sourceByteLen; + for (let i = 0; i < sourceByteLen; i++) + if (uint8[i] >= 128) { + firstNonAsciiPos = i; + break; + } + } return deserialize(uint32[536870900]); } @@ -5403,11 +5411,12 @@ function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - if (sourceIsAscii && pos < sourceEndPos) return sourceText.substr(pos, len); - // Longer strings use `TextDecoder` - // TODO: Find best switch-over point + if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos)) + return sourceText.substr(pos, len); + // Use `TextDecoder` for strings longer than 9 bytes. + // For shorter strings, the byte-by-byte loop below avoids native call overhead. let end = pos + len; - if (len > 50) return decodeStr(uint8.subarray(pos, end)); + if (len > 9) return decodeStr(uint8.subarray(pos, end)); // Shorter strings decode by hand to avoid native call let out = "", c; diff --git a/napi/parser/src-js/generated/deserialize/ts_range_parent.js b/napi/parser/src-js/generated/deserialize/ts_range_parent.js index 9849a612e8e0a..944adad33b26e 100644 --- a/napi/parser/src-js/generated/deserialize/ts_range_parent.js +++ b/napi/parser/src-js/generated/deserialize/ts_range_parent.js @@ -7,6 +7,7 @@ let uint8, sourceText, sourceIsAscii, sourceEndPos, + firstNonAsciiPos, parent = null; const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }), @@ -26,6 +27,14 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de float64 = buffer.float64; sourceText = sourceTextInput; sourceIsAscii = sourceText.length === sourceByteLen; + if (!sourceIsAscii) { + firstNonAsciiPos = sourceByteLen; + for (let i = 0; i < sourceByteLen; i++) + if (uint8[i] >= 128) { + firstNonAsciiPos = i; + break; + } + } return deserialize(uint32[536870900]); } @@ -5966,11 +5975,12 @@ function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - if (sourceIsAscii && pos < sourceEndPos) return sourceText.substr(pos, len); - // Longer strings use `TextDecoder` - // TODO: Find best switch-over point + if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos)) + return sourceText.substr(pos, len); + // Use `TextDecoder` for strings longer than 9 bytes. + // For shorter strings, the byte-by-byte loop below avoids native call overhead. let end = pos + len; - if (len > 50) return decodeStr(uint8.subarray(pos, end)); + if (len > 9) return decodeStr(uint8.subarray(pos, end)); // Shorter strings decode by hand to avoid native call let out = "", c; diff --git a/tasks/ast_tools/src/generators/raw_transfer.rs b/tasks/ast_tools/src/generators/raw_transfer.rs index 7feffc9bb8e65..b024b6239aa0e 100644 --- a/tasks/ast_tools/src/generators/raw_transfer.rs +++ b/tasks/ast_tools/src/generators/raw_transfer.rs @@ -143,7 +143,7 @@ fn generate_deserializers( import {{ comments, initComments }} from '../plugins/comments.js'; /* END_IF */ - let uint8, uint32, float64, sourceText, sourceIsAscii, sourceStartPos, sourceEndPos; + let uint8, uint32, float64, sourceText, sourceIsAscii, sourceStartPos, sourceEndPos, firstNonAsciiPos; let parent = null; let getLoc; @@ -188,6 +188,23 @@ fn generate_deserializers( sourceText = sourceTextInput; sourceIsAscii = sourceText.length === sourceByteLen; + if (!sourceIsAscii) {{ + // Find first non-ASCII byte in source region. + // `sourceText.substr()` can be used for strings ending before this position, + // since byte offsets equal char offsets in the all-ASCII prefix. + if (LINTER) {{ + firstNonAsciiPos = sourceByteLen; + for (let i = sourceStartPos, e = sourceStartPos + sourceByteLen; i < e; i++) {{ + if (uint8[i] >= 128) {{ firstNonAsciiPos = i - sourceStartPos; break; }} + }} + }} else {{ + firstNonAsciiPos = sourceByteLen; + for (let i = 0; i < sourceByteLen; i++) {{ + if (uint8[i] >= 128) {{ firstNonAsciiPos = i; break; }} + }} + }} + }} + if (LOC) getLoc = getLocInput; return deserialize(uint32[{data_pointer_pos_32}]); @@ -874,15 +891,17 @@ static STR_DESERIALIZER_BODY: &str = " pos = uint32[pos32]; if (LINTER) { - if (sourceIsAscii && pos >= sourceStartPos) return sourceText.substr(pos - sourceStartPos, len); + if (pos >= sourceStartPos && (sourceIsAscii || pos - sourceStartPos + len <= firstNonAsciiPos)) + return sourceText.substr(pos - sourceStartPos, len); } else { - if (sourceIsAscii && pos < sourceEndPos) return sourceText.substr(pos, len); + if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos)) + return sourceText.substr(pos, len); } - // Longer strings use `TextDecoder` - // TODO: Find best switch-over point + // Use `TextDecoder` for strings longer than 9 bytes. + // For shorter strings, the byte-by-byte loop below avoids native call overhead. const end = pos + len; - if (len > 50) return decodeStr(uint8.subarray(pos, end)); + if (len > 9) return decodeStr(uint8.subarray(pos, end)); // Shorter strings decode by hand to avoid native call let out = '',