From 5ab8da099f84b3af47466abdd99d5dc86322b2cc Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 19:47:45 +0100 Subject: [PATCH 01/20] baseline: PR #20834 + firstNonAsciiPos (threshold 9) --- versions/experiment.mjs | 46 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 versions/experiment.mjs diff --git a/versions/experiment.mjs b/versions/experiment.mjs new file mode 100644 index 0000000..6b70c54 --- /dev/null +++ b/versions/experiment.mjs @@ -0,0 +1,46 @@ +/** + * PR #20834 + `firstNonAsciiPos`: extends `substr` fast path into non-ASCII sources. + */ + +// oxlint-disable prefer-const + +const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }), + decodeStr = textDecoder.decode.bind(textDecoder); + +const { fromCodePoint } = String; + +let firstNonAsciiPos; + +export function setup() { + // Find first non-ASCII byte in source region + firstNonAsciiPos = sourceEndPos; + for (let i = 0; i < sourceEndPos; i++) { + if (uint8[i] >= 128) { + firstNonAsciiPos = i; + break; + } + } +} + +export function deserializeStr(pos) { + let pos32 = pos >> 2, + len = uint32[pos32 + 2]; + if (len === 0) return ""; + pos = uint32[pos32]; + if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos)) { + return sourceText.substr(pos, len); + } + let end = pos + len; + if (len > 9) return decodeStr(uint8.subarray(pos, end)); + let out = "", + c; + do { + c = uint8[pos++]; + if (c < 128) out += fromCodePoint(c); + else { + out += decodeStr(uint8.subarray(pos - 1, end)); + break; + } + } while (pos < end); + return out; +} From e1360e2134de860a49ddafa3c354bd67886d3709 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 19:48:14 +0100 Subject: [PATCH 02/20] exp1: fromCharCode + inline textDecoder.decode --- versions/experiment.mjs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index 6b70c54..f3fce86 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,18 +1,17 @@ /** - * PR #20834 + `firstNonAsciiPos`: extends `substr` fast path into non-ASCII sources. + * Experiment 1: Use String.fromCharCode instead of fromCodePoint for ASCII, + * and inline TextDecoder.decode to avoid bound function overhead. */ // oxlint-disable prefer-const -const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }), - decodeStr = textDecoder.decode.bind(textDecoder); +const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }); -const { fromCodePoint } = String; +const { fromCharCode } = String; let firstNonAsciiPos; export function setup() { - // Find first non-ASCII byte in source region firstNonAsciiPos = sourceEndPos; for (let i = 0; i < sourceEndPos; i++) { if (uint8[i] >= 128) { @@ -31,14 +30,14 @@ export function deserializeStr(pos) { return sourceText.substr(pos, len); } let end = pos + len; - if (len > 9) return decodeStr(uint8.subarray(pos, end)); + if (len > 9) return textDecoder.decode(uint8.subarray(pos, end)); let out = "", c; do { c = uint8[pos++]; - if (c < 128) out += fromCodePoint(c); + if (c < 128) out += fromCharCode(c); else { - out += decodeStr(uint8.subarray(pos - 1, end)); + out += textDecoder.decode(uint8.subarray(pos - 1, end)); break; } } while (pos < end); From cfedd0b9ae29304afb5e713f17d5d493802b0377 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 19:49:48 +0100 Subject: [PATCH 03/20] exp4: batch fromCharCode with switch on len --- versions/experiment.mjs | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index f3fce86..76e604f 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,6 +1,6 @@ /** - * Experiment 1: Use String.fromCharCode instead of fromCodePoint for ASCII, - * and inline TextDecoder.decode to avoid bound function overhead. + * Experiment 4: For short ASCII strings (<=9 bytes), collect char codes + * and use a single fromCharCode call. Falls back to TextDecoder on non-ASCII. */ // oxlint-disable prefer-const @@ -31,15 +31,24 @@ export function deserializeStr(pos) { } let end = pos + len; if (len > 9) return textDecoder.decode(uint8.subarray(pos, end)); - let out = "", - c; - do { - c = uint8[pos++]; - if (c < 128) out += fromCharCode(c); - else { - out += textDecoder.decode(uint8.subarray(pos - 1, end)); - break; + // Check if all bytes are ASCII first + let allAscii = true; + for (let i = pos; i < end; i++) { + if (uint8[i] >= 128) { allAscii = false; break; } + } + if (allAscii) { + // Single fromCharCode call with all codes + switch (len) { + case 1: return fromCharCode(uint8[pos]); + case 2: return fromCharCode(uint8[pos], uint8[pos+1]); + case 3: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2]); + case 4: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3]); + case 5: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4]); + case 6: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5]); + case 7: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6]); + case 8: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7]); + case 9: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8]); } - } while (pos < end); - return out; + } + return textDecoder.decode(uint8.subarray(pos, end)); } From 695f26936f37d1fbb0b319544bc2580ba0256cd8 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 19:50:47 +0100 Subject: [PATCH 04/20] exp6: extend batch fromCharCode to threshold 12 --- versions/experiment.mjs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index 76e604f..25f1208 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,6 +1,5 @@ /** - * Experiment 4: For short ASCII strings (<=9 bytes), collect char codes - * and use a single fromCharCode call. Falls back to TextDecoder on non-ASCII. + * Experiment 6: Extend batch fromCharCode switch to threshold 12. */ // oxlint-disable prefer-const @@ -30,14 +29,13 @@ export function deserializeStr(pos) { return sourceText.substr(pos, len); } let end = pos + len; - if (len > 9) return textDecoder.decode(uint8.subarray(pos, end)); + if (len > 12) return textDecoder.decode(uint8.subarray(pos, end)); // Check if all bytes are ASCII first let allAscii = true; for (let i = pos; i < end; i++) { if (uint8[i] >= 128) { allAscii = false; break; } } if (allAscii) { - // Single fromCharCode call with all codes switch (len) { case 1: return fromCharCode(uint8[pos]); case 2: return fromCharCode(uint8[pos], uint8[pos+1]); @@ -48,6 +46,9 @@ export function deserializeStr(pos) { case 7: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6]); case 8: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7]); case 9: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8]); + case 10: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9]); + case 11: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10]); + case 12: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11]); } } return textDecoder.decode(uint8.subarray(pos, end)); From e4b31fbd8b4689f4da98e4acb6156609b9e53fb3 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 19:51:19 +0100 Subject: [PATCH 05/20] exp7: extend batch fromCharCode to threshold 16 --- versions/experiment.mjs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index 25f1208..fb2f82d 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,5 +1,5 @@ /** - * Experiment 6: Extend batch fromCharCode switch to threshold 12. + * Experiment 7: Extend batch fromCharCode to threshold 16. */ // oxlint-disable prefer-const @@ -29,7 +29,7 @@ export function deserializeStr(pos) { return sourceText.substr(pos, len); } let end = pos + len; - if (len > 12) return textDecoder.decode(uint8.subarray(pos, end)); + if (len > 16) return textDecoder.decode(uint8.subarray(pos, end)); // Check if all bytes are ASCII first let allAscii = true; for (let i = pos; i < end; i++) { @@ -49,6 +49,10 @@ export function deserializeStr(pos) { case 10: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9]); case 11: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10]); case 12: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11]); + case 13: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12]); + case 14: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13]); + case 15: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14]); + case 16: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15]); } } return textDecoder.decode(uint8.subarray(pos, end)); From 842dce4cb5145a3742477d8f7be3ee06300d2f0a Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 19:52:01 +0100 Subject: [PATCH 06/20] exp8: extend batch fromCharCode to threshold 24 --- versions/experiment.mjs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index fb2f82d..a4002f5 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,5 +1,5 @@ /** - * Experiment 7: Extend batch fromCharCode to threshold 16. + * Experiment 8: Extend batch fromCharCode to threshold 24. */ // oxlint-disable prefer-const @@ -29,7 +29,7 @@ export function deserializeStr(pos) { return sourceText.substr(pos, len); } let end = pos + len; - if (len > 16) return textDecoder.decode(uint8.subarray(pos, end)); + if (len > 24) return textDecoder.decode(uint8.subarray(pos, end)); // Check if all bytes are ASCII first let allAscii = true; for (let i = pos; i < end; i++) { @@ -53,6 +53,14 @@ export function deserializeStr(pos) { case 14: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13]); case 15: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14]); case 16: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15]); + case 17: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16]); + case 18: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17]); + case 19: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18]); + case 20: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19]); + case 21: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20]); + case 22: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21]); + case 23: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22]); + case 24: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23]); } } return textDecoder.decode(uint8.subarray(pos, end)); From 3ef3cd0cebfbf927560fcb3a7d4fcca52f3510c6 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 19:53:35 +0100 Subject: [PATCH 07/20] exp10: extend batch fromCharCode to threshold 32 --- versions/experiment.mjs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index a4002f5..7e16f5b 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,5 +1,5 @@ /** - * Experiment 8: Extend batch fromCharCode to threshold 24. + * Experiment 10: Extend batch fromCharCode to threshold 32. */ // oxlint-disable prefer-const @@ -29,7 +29,7 @@ export function deserializeStr(pos) { return sourceText.substr(pos, len); } let end = pos + len; - if (len > 24) return textDecoder.decode(uint8.subarray(pos, end)); + if (len > 32) return textDecoder.decode(uint8.subarray(pos, end)); // Check if all bytes are ASCII first let allAscii = true; for (let i = pos; i < end; i++) { @@ -61,6 +61,14 @@ export function deserializeStr(pos) { case 22: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21]); case 23: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22]); case 24: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23]); + case 25: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24]); + case 26: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25]); + case 27: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26]); + case 28: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27]); + case 29: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28]); + case 30: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29]); + case 31: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30]); + case 32: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31]); } } return textDecoder.decode(uint8.subarray(pos, end)); From e08bc8dbea87a99d9752cdfd0dfe75545d6937ee Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 19:57:24 +0100 Subject: [PATCH 08/20] exp13: extend batch fromCharCode to threshold 48 --- versions/experiment.mjs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index 7e16f5b..58aefb7 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,5 +1,5 @@ /** - * Experiment 10: Extend batch fromCharCode to threshold 32. + * Experiment 13: Extend batch fromCharCode to threshold 48. */ // oxlint-disable prefer-const @@ -29,7 +29,7 @@ export function deserializeStr(pos) { return sourceText.substr(pos, len); } let end = pos + len; - if (len > 32) return textDecoder.decode(uint8.subarray(pos, end)); + if (len > 48) return textDecoder.decode(uint8.subarray(pos, end)); // Check if all bytes are ASCII first let allAscii = true; for (let i = pos; i < end; i++) { @@ -69,6 +69,22 @@ export function deserializeStr(pos) { case 30: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29]); case 31: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30]); case 32: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31]); + case 33: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32]); + case 34: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33]); + case 35: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34]); + case 36: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35]); + case 37: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36]); + case 38: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37]); + case 39: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38]); + case 40: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39]); + case 41: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40]); + case 42: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41]); + case 43: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41], uint8[pos+42]); + case 44: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41], uint8[pos+42], uint8[pos+43]); + case 45: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41], uint8[pos+42], uint8[pos+43], uint8[pos+44]); + case 46: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41], uint8[pos+42], uint8[pos+43], uint8[pos+44], uint8[pos+45]); + case 47: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41], uint8[pos+42], uint8[pos+43], uint8[pos+44], uint8[pos+45], uint8[pos+46]); + case 48: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41], uint8[pos+42], uint8[pos+43], uint8[pos+44], uint8[pos+45], uint8[pos+46], uint8[pos+47]); } } return textDecoder.decode(uint8.subarray(pos, end)); From 04fdac888620f30563f884837a9663ba72302974 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 19:59:06 +0100 Subject: [PATCH 09/20] exp15: pre-decode buffer as latin1, substr for ASCII strings --- versions/experiment.mjs | 66 +++++++---------------------------------- 1 file changed, 11 insertions(+), 55 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index 58aefb7..d431aff 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,5 +1,7 @@ /** - * Experiment 13: Extend batch fromCharCode to threshold 48. + * Experiment 15: Pre-decode the entire buffer as ASCII in setup(). + * For any ASCII string, use substr from the pre-decoded string. + * Only fall back to TextDecoder for non-ASCII. */ // oxlint-disable prefer-const @@ -9,6 +11,7 @@ const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }); const { fromCharCode } = String; let firstNonAsciiPos; +let bufferAsAscii; export function setup() { firstNonAsciiPos = sourceEndPos; @@ -18,6 +21,11 @@ export function setup() { break; } } + // Pre-decode entire buffer as latin1 (bytes as char codes). + // For ASCII bytes this gives the correct string. + // Build using TextDecoder('latin1') which is extremely fast. + const latin1Decoder = new TextDecoder("latin1"); + bufferAsAscii = latin1Decoder.decode(uint8); } export function deserializeStr(pos) { @@ -29,63 +37,11 @@ export function deserializeStr(pos) { return sourceText.substr(pos, len); } let end = pos + len; - if (len > 48) return textDecoder.decode(uint8.subarray(pos, end)); - // Check if all bytes are ASCII first + // Check if all bytes are ASCII let allAscii = true; for (let i = pos; i < end; i++) { if (uint8[i] >= 128) { allAscii = false; break; } } - if (allAscii) { - switch (len) { - case 1: return fromCharCode(uint8[pos]); - case 2: return fromCharCode(uint8[pos], uint8[pos+1]); - case 3: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2]); - case 4: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3]); - case 5: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4]); - case 6: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5]); - case 7: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6]); - case 8: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7]); - case 9: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8]); - case 10: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9]); - case 11: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10]); - case 12: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11]); - case 13: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12]); - case 14: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13]); - case 15: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14]); - case 16: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15]); - case 17: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16]); - case 18: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17]); - case 19: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18]); - case 20: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19]); - case 21: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20]); - case 22: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21]); - case 23: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22]); - case 24: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23]); - case 25: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24]); - case 26: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25]); - case 27: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26]); - case 28: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27]); - case 29: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28]); - case 30: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29]); - case 31: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30]); - case 32: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31]); - case 33: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32]); - case 34: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33]); - case 35: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34]); - case 36: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35]); - case 37: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36]); - case 38: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37]); - case 39: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38]); - case 40: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39]); - case 41: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40]); - case 42: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41]); - case 43: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41], uint8[pos+42]); - case 44: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41], uint8[pos+42], uint8[pos+43]); - case 45: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41], uint8[pos+42], uint8[pos+43], uint8[pos+44]); - case 46: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41], uint8[pos+42], uint8[pos+43], uint8[pos+44], uint8[pos+45]); - case 47: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41], uint8[pos+42], uint8[pos+43], uint8[pos+44], uint8[pos+45], uint8[pos+46]); - case 48: return fromCharCode(uint8[pos], uint8[pos+1], uint8[pos+2], uint8[pos+3], uint8[pos+4], uint8[pos+5], uint8[pos+6], uint8[pos+7], uint8[pos+8], uint8[pos+9], uint8[pos+10], uint8[pos+11], uint8[pos+12], uint8[pos+13], uint8[pos+14], uint8[pos+15], uint8[pos+16], uint8[pos+17], uint8[pos+18], uint8[pos+19], uint8[pos+20], uint8[pos+21], uint8[pos+22], uint8[pos+23], uint8[pos+24], uint8[pos+25], uint8[pos+26], uint8[pos+27], uint8[pos+28], uint8[pos+29], uint8[pos+30], uint8[pos+31], uint8[pos+32], uint8[pos+33], uint8[pos+34], uint8[pos+35], uint8[pos+36], uint8[pos+37], uint8[pos+38], uint8[pos+39], uint8[pos+40], uint8[pos+41], uint8[pos+42], uint8[pos+43], uint8[pos+44], uint8[pos+45], uint8[pos+46], uint8[pos+47]); - } - } + if (allAscii) return bufferAsAscii.substr(pos, len); return textDecoder.decode(uint8.subarray(pos, end)); } From 5628183769058ebdcb3cbbc4c17558a58f6c1136 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 20:01:41 +0100 Subject: [PATCH 10/20] exp19: strDataIsAscii flag + streamlined branching --- versions/experiment.mjs | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index d431aff..9017aec 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,17 +1,18 @@ /** - * Experiment 15: Pre-decode the entire buffer as ASCII in setup(). - * For any ASCII string, use substr from the pre-decoded string. - * Only fall back to TextDecoder for non-ASCII. + * Experiment 19: Combine latin1 buffer with extended sourceText fast path. + * Use sourceText.substr for source strings where sourceIsAscii. + * Use bufferAsAscii for non-source ASCII strings (no per-byte check needed + * for files where all non-source content is ASCII). + * Fall back to per-byte check + bufferAsAscii for mixed. */ // oxlint-disable prefer-const const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }); -const { fromCharCode } = String; - let firstNonAsciiPos; let bufferAsAscii; +let strDataIsAscii; export function setup() { firstNonAsciiPos = sourceEndPos; @@ -21,11 +22,16 @@ export function setup() { break; } } - // Pre-decode entire buffer as latin1 (bytes as char codes). - // For ASCII bytes this gives the correct string. - // Build using TextDecoder('latin1') which is extremely fast. const latin1Decoder = new TextDecoder("latin1"); bufferAsAscii = latin1Decoder.decode(uint8); + // Check if all strData bytes (after source) are ASCII + strDataIsAscii = true; + for (let i = sourceEndPos; i < uint8.length; i++) { + if (uint8[i] >= 128) { + strDataIsAscii = false; + break; + } + } } export function deserializeStr(pos) { @@ -33,15 +39,17 @@ export function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos)) { - return sourceText.substr(pos, len); + if (pos < sourceEndPos) { + if (sourceIsAscii || pos + len <= firstNonAsciiPos) { + return sourceText.substr(pos, len); + } + } else if (strDataIsAscii) { + return bufferAsAscii.substr(pos, len); } let end = pos + len; - // Check if all bytes are ASCII - let allAscii = true; + // Per-byte ASCII check for (let i = pos; i < end; i++) { - if (uint8[i] >= 128) { allAscii = false; break; } + if (uint8[i] >= 128) return textDecoder.decode(uint8.subarray(pos, end)); } - if (allAscii) return bufferAsAscii.substr(pos, len); - return textDecoder.decode(uint8.subarray(pos, end)); + return bufferAsAscii.substr(pos, len); } From ebfbf991596ed640bb8a53e35fbc601a03771c8d Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 20:06:30 +0100 Subject: [PATCH 11/20] exp24: minimize branching with firstNonAsciiBufPos --- versions/experiment.mjs | 49 +++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index 9017aec..55d328d 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,9 +1,7 @@ /** - * Experiment 19: Combine latin1 buffer with extended sourceText fast path. - * Use sourceText.substr for source strings where sourceIsAscii. - * Use bufferAsAscii for non-source ASCII strings (no per-byte check needed - * for files where all non-source content is ASCII). - * Fall back to per-byte check + bufferAsAscii for mixed. + * Experiment 24: Minimize branching. Use bufferAsAscii as default, + * only check for non-ASCII when the string might contain it. + * Pre-compute firstNonAsciiBufPos to cover entire buffer. */ // oxlint-disable prefer-const @@ -11,27 +9,25 @@ const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }); let firstNonAsciiPos; +let firstNonAsciiBufPos; // first non-ASCII byte in entire buffer let bufferAsAscii; -let strDataIsAscii; export function setup() { firstNonAsciiPos = sourceEndPos; - for (let i = 0; i < sourceEndPos; i++) { + firstNonAsciiBufPos = uint8.length; + for (let i = 0; i < uint8.length; i++) { if (uint8[i] >= 128) { - firstNonAsciiPos = i; - break; + if (i < sourceEndPos && firstNonAsciiPos === sourceEndPos) { + firstNonAsciiPos = i; + } + if (firstNonAsciiBufPos === uint8.length) { + firstNonAsciiBufPos = i; + } + if (firstNonAsciiPos < sourceEndPos) break; } } const latin1Decoder = new TextDecoder("latin1"); bufferAsAscii = latin1Decoder.decode(uint8); - // Check if all strData bytes (after source) are ASCII - strDataIsAscii = true; - for (let i = sourceEndPos; i < uint8.length; i++) { - if (uint8[i] >= 128) { - strDataIsAscii = false; - break; - } - } } export function deserializeStr(pos) { @@ -39,17 +35,18 @@ export function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - if (pos < sourceEndPos) { - if (sourceIsAscii || pos + len <= firstNonAsciiPos) { - return sourceText.substr(pos, len); - } - } else if (strDataIsAscii) { - return bufferAsAscii.substr(pos, len); - } + // Source string in ASCII prefix: use sourceText (handles byte=char offset) + if (pos + len <= firstNonAsciiPos) return sourceText.substr(pos, len); + // String ends before any non-ASCII in buffer: use bufferAsAscii + if (pos + len <= firstNonAsciiBufPos) return bufferAsAscii.substr(pos, len); + // Need to check: might be ASCII (after non-ASCII region) or non-ASCII let end = pos + len; - // Per-byte ASCII check for (let i = pos; i < end; i++) { - if (uint8[i] >= 128) return textDecoder.decode(uint8.subarray(pos, end)); + if (uint8[i] >= 128) { + // Non-ASCII: source strings use sourceText via TextDecoder, others use TextDecoder + return textDecoder.decode(uint8.subarray(pos, end)); + } } + // All ASCII: use bufferAsAscii return bufferAsAscii.substr(pos, len); } From a07cdd49ac87796e8df69c530ea81aac300cb94d Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 20:06:58 +0100 Subject: [PATCH 12/20] exp24b: fix source boundary + separate source/non-source paths --- versions/experiment.mjs | 51 ++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index 55d328d..95d71ac 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,7 +1,6 @@ /** - * Experiment 24: Minimize branching. Use bufferAsAscii as default, - * only check for non-ASCII when the string might contain it. - * Pre-compute firstNonAsciiBufPos to cover entire buffer. + * Experiment 24b: Fix source boundary handling. Source strings always + * go through sourceText path. Non-source strings use bufferAsAscii. */ // oxlint-disable prefer-const @@ -9,25 +8,26 @@ const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }); let firstNonAsciiPos; -let firstNonAsciiBufPos; // first non-ASCII byte in entire buffer let bufferAsAscii; +let strDataIsAscii; export function setup() { firstNonAsciiPos = sourceEndPos; - firstNonAsciiBufPos = uint8.length; - for (let i = 0; i < uint8.length; i++) { + for (let i = 0; i < sourceEndPos; i++) { if (uint8[i] >= 128) { - if (i < sourceEndPos && firstNonAsciiPos === sourceEndPos) { - firstNonAsciiPos = i; - } - if (firstNonAsciiBufPos === uint8.length) { - firstNonAsciiBufPos = i; - } - if (firstNonAsciiPos < sourceEndPos) break; + firstNonAsciiPos = i; + break; } } const latin1Decoder = new TextDecoder("latin1"); bufferAsAscii = latin1Decoder.decode(uint8); + strDataIsAscii = true; + for (let i = sourceEndPos; i < uint8.length; i++) { + if (uint8[i] >= 128) { + strDataIsAscii = false; + break; + } + } } export function deserializeStr(pos) { @@ -35,18 +35,23 @@ export function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - // Source string in ASCII prefix: use sourceText (handles byte=char offset) - if (pos + len <= firstNonAsciiPos) return sourceText.substr(pos, len); - // String ends before any non-ASCII in buffer: use bufferAsAscii - if (pos + len <= firstNonAsciiBufPos) return bufferAsAscii.substr(pos, len); - // Need to check: might be ASCII (after non-ASCII region) or non-ASCII + // Source strings + if (pos < sourceEndPos) { + if (sourceIsAscii || pos + len <= firstNonAsciiPos) { + return sourceText.substr(pos, len); + } + // Source string past firstNonAsciiPos - check bytes + let end = pos + len; + for (let i = pos; i < end; i++) { + if (uint8[i] >= 128) return textDecoder.decode(uint8.subarray(pos, end)); + } + return bufferAsAscii.substr(pos, len); + } + // Non-source strings + if (strDataIsAscii) return bufferAsAscii.substr(pos, len); let end = pos + len; for (let i = pos; i < end; i++) { - if (uint8[i] >= 128) { - // Non-ASCII: source strings use sourceText via TextDecoder, others use TextDecoder - return textDecoder.decode(uint8.subarray(pos, end)); - } + if (uint8[i] >= 128) return textDecoder.decode(uint8.subarray(pos, end)); } - // All ASCII: use bufferAsAscii return bufferAsAscii.substr(pos, len); } From 69d3b2d35478b28db6551462714e8019d78ed742 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 20:08:13 +0100 Subject: [PATCH 13/20] exp26: add lastNonAsciiSrcEnd for tail-ASCII fast path --- versions/experiment.mjs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index 95d71ac..e1a66b6 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,6 +1,8 @@ /** - * Experiment 24b: Fix source boundary handling. Source strings always - * go through sourceText path. Non-source strings use bufferAsAscii. + * Experiment 26: Add lastNonAsciiSrcEnd to extend fast path. + * Source strings before firstNonAsciiPos: sourceText.substr + * Source strings after lastNonAsciiSrcEnd: bufferAsAscii.substr (no scan) + * Source strings between: per-byte scan */ // oxlint-disable prefer-const @@ -8,15 +10,17 @@ const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }); let firstNonAsciiPos; +let lastNonAsciiSrcEnd; // byte pos after last non-ASCII byte in source let bufferAsAscii; let strDataIsAscii; export function setup() { firstNonAsciiPos = sourceEndPos; + lastNonAsciiSrcEnd = 0; for (let i = 0; i < sourceEndPos; i++) { if (uint8[i] >= 128) { - firstNonAsciiPos = i; - break; + if (firstNonAsciiPos === sourceEndPos) firstNonAsciiPos = i; + lastNonAsciiSrcEnd = i + 1; } } const latin1Decoder = new TextDecoder("latin1"); @@ -40,7 +44,11 @@ export function deserializeStr(pos) { if (sourceIsAscii || pos + len <= firstNonAsciiPos) { return sourceText.substr(pos, len); } - // Source string past firstNonAsciiPos - check bytes + // After all non-ASCII source bytes: guaranteed ASCII + if (pos >= lastNonAsciiSrcEnd) { + return bufferAsAscii.substr(pos, len); + } + // In the non-ASCII zone: per-byte scan let end = pos + len; for (let i = pos; i < end; i++) { if (uint8[i] >= 128) return textDecoder.decode(uint8.subarray(pos, end)); From 86720dc6369975c036e3ef6138ed05845291dfef Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 20:09:55 +0100 Subject: [PATCH 14/20] exp29: cumulative non-ASCII count for O(1) ASCII range check --- versions/experiment.mjs | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index e1a66b6..847f7eb 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,8 +1,6 @@ /** - * Experiment 26: Add lastNonAsciiSrcEnd to extend fast path. - * Source strings before firstNonAsciiPos: sourceText.substr - * Source strings after lastNonAsciiSrcEnd: bufferAsAscii.substr (no scan) - * Source strings between: per-byte scan + * Experiment 29: Pre-compute cumulative non-ASCII byte count. + * O(1) ASCII check for any range: nonAsciiCum[end] - nonAsciiCum[pos] === 0. */ // oxlint-disable prefer-const @@ -10,9 +8,10 @@ const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }); let firstNonAsciiPos; -let lastNonAsciiSrcEnd; // byte pos after last non-ASCII byte in source +let lastNonAsciiSrcEnd; let bufferAsAscii; let strDataIsAscii; +let nonAsciiCum; // cumulative non-ASCII count export function setup() { firstNonAsciiPos = sourceEndPos; @@ -32,6 +31,14 @@ export function setup() { break; } } + // Build cumulative non-ASCII count for entire buffer + nonAsciiCum = new Uint32Array(uint8.length + 1); + let count = 0; + for (let i = 0; i < uint8.length; i++) { + nonAsciiCum[i] = count; + if (uint8[i] >= 128) count++; + } + nonAsciiCum[uint8.length] = count; } export function deserializeStr(pos) { @@ -39,27 +46,24 @@ export function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - // Source strings if (pos < sourceEndPos) { if (sourceIsAscii || pos + len <= firstNonAsciiPos) { return sourceText.substr(pos, len); } - // After all non-ASCII source bytes: guaranteed ASCII if (pos >= lastNonAsciiSrcEnd) { return bufferAsAscii.substr(pos, len); } - // In the non-ASCII zone: per-byte scan + // O(1) ASCII check using cumulative count let end = pos + len; - for (let i = pos; i < end; i++) { - if (uint8[i] >= 128) return textDecoder.decode(uint8.subarray(pos, end)); + if (nonAsciiCum[end] === nonAsciiCum[pos]) { + return bufferAsAscii.substr(pos, len); } - return bufferAsAscii.substr(pos, len); + return textDecoder.decode(uint8.subarray(pos, end)); } - // Non-source strings if (strDataIsAscii) return bufferAsAscii.substr(pos, len); let end = pos + len; - for (let i = pos; i < end; i++) { - if (uint8[i] >= 128) return textDecoder.decode(uint8.subarray(pos, end)); + if (nonAsciiCum[end] === nonAsciiCum[pos]) { + return bufferAsAscii.substr(pos, len); } - return bufferAsAscii.substr(pos, len); + return textDecoder.decode(uint8.subarray(pos, end)); } From e274000ae4e3d27bf3933d71fc4d53bbb1ec4ae7 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 20:10:31 +0100 Subject: [PATCH 15/20] exp30: simplified - only cumulative count, no first/lastNonAscii --- versions/experiment.mjs | 47 +++++++++++------------------------------ 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index 847f7eb..c9514e6 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,36 +1,18 @@ /** - * Experiment 29: Pre-compute cumulative non-ASCII byte count. - * O(1) ASCII check for any range: nonAsciiCum[end] - nonAsciiCum[pos] === 0. + * Experiment 30: Simplified - remove firstNonAsciiPos/lastNonAsciiSrcEnd. + * Use only cumulative count for all non-sourceIsAscii cases. */ // oxlint-disable prefer-const const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }); -let firstNonAsciiPos; -let lastNonAsciiSrcEnd; let bufferAsAscii; -let strDataIsAscii; -let nonAsciiCum; // cumulative non-ASCII count +let nonAsciiCum; export function setup() { - firstNonAsciiPos = sourceEndPos; - lastNonAsciiSrcEnd = 0; - for (let i = 0; i < sourceEndPos; i++) { - if (uint8[i] >= 128) { - if (firstNonAsciiPos === sourceEndPos) firstNonAsciiPos = i; - lastNonAsciiSrcEnd = i + 1; - } - } const latin1Decoder = new TextDecoder("latin1"); bufferAsAscii = latin1Decoder.decode(uint8); - strDataIsAscii = true; - for (let i = sourceEndPos; i < uint8.length; i++) { - if (uint8[i] >= 128) { - strDataIsAscii = false; - break; - } - } // Build cumulative non-ASCII count for entire buffer nonAsciiCum = new Uint32Array(uint8.length + 1); let count = 0; @@ -46,24 +28,19 @@ export function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - if (pos < sourceEndPos) { - if (sourceIsAscii || pos + len <= firstNonAsciiPos) { - return sourceText.substr(pos, len); - } - if (pos >= lastNonAsciiSrcEnd) { - return bufferAsAscii.substr(pos, len); - } - // O(1) ASCII check using cumulative count - let end = pos + len; - if (nonAsciiCum[end] === nonAsciiCum[pos]) { - return bufferAsAscii.substr(pos, len); - } - return textDecoder.decode(uint8.subarray(pos, end)); + // Source strings with ASCII source + if (pos < sourceEndPos && sourceIsAscii) { + return sourceText.substr(pos, len); } - if (strDataIsAscii) return bufferAsAscii.substr(pos, len); + // O(1) ASCII check using cumulative count let end = pos + len; if (nonAsciiCum[end] === nonAsciiCum[pos]) { return bufferAsAscii.substr(pos, len); } + // Non-ASCII: need to check if it's a source string + if (pos < sourceEndPos) { + // Use TextDecoder for source strings (sourceText.substr won't work without byteToChar mapping) + return textDecoder.decode(uint8.subarray(pos, end)); + } return textDecoder.decode(uint8.subarray(pos, end)); } From bc3c7bfebfb0fde265fc9b1d37866a2b85794da0 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 20:11:10 +0100 Subject: [PATCH 16/20] exp31: maximum simplification - no sourceText, pure cumulative + bufferAsAscii --- versions/experiment.mjs | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index c9514e6..58c26e7 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,6 +1,7 @@ /** - * Experiment 30: Simplified - remove firstNonAsciiPos/lastNonAsciiSrcEnd. - * Use only cumulative count for all non-sourceIsAscii cases. + * Experiment 31: Maximum simplification. No sourceText path at all. + * All ASCII strings use bufferAsAscii. All non-ASCII use TextDecoder. + * Only the cumulative count decides. */ // oxlint-disable prefer-const @@ -13,7 +14,6 @@ let nonAsciiCum; export function setup() { const latin1Decoder = new TextDecoder("latin1"); bufferAsAscii = latin1Decoder.decode(uint8); - // Build cumulative non-ASCII count for entire buffer nonAsciiCum = new Uint32Array(uint8.length + 1); let count = 0; for (let i = 0; i < uint8.length; i++) { @@ -28,19 +28,9 @@ export function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; - // Source strings with ASCII source - if (pos < sourceEndPos && sourceIsAscii) { - return sourceText.substr(pos, len); - } - // O(1) ASCII check using cumulative count let end = pos + len; if (nonAsciiCum[end] === nonAsciiCum[pos]) { return bufferAsAscii.substr(pos, len); } - // Non-ASCII: need to check if it's a source string - if (pos < sourceEndPos) { - // Use TextDecoder for source strings (sourceText.substr won't work without byteToChar mapping) - return textDecoder.decode(uint8.subarray(pos, end)); - } return textDecoder.decode(uint8.subarray(pos, end)); } From b94307c65a9df9d40952783059342efdce020df0 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 20:11:30 +0100 Subject: [PATCH 17/20] exp31b: restore sourceText path for ASCII source boundary strings --- versions/experiment.mjs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index 58c26e7..dafe9ee 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,7 +1,6 @@ /** - * Experiment 31: Maximum simplification. No sourceText path at all. - * All ASCII strings use bufferAsAscii. All non-ASCII use TextDecoder. - * Only the cumulative count decides. + * Experiment 31b: Fix boundary case. Use sourceText.substr for source + * strings in ASCII sources. Use cumulative + bufferAsAscii for the rest. */ // oxlint-disable prefer-const @@ -28,9 +27,8 @@ export function deserializeStr(pos) { len = uint32[pos32 + 2]; if (len === 0) return ""; pos = uint32[pos32]; + if (pos < sourceEndPos && sourceIsAscii) return sourceText.substr(pos, len); let end = pos + len; - if (nonAsciiCum[end] === nonAsciiCum[pos]) { - return bufferAsAscii.substr(pos, len); - } + if (nonAsciiCum[end] === nonAsciiCum[pos]) return bufferAsAscii.substr(pos, len); return textDecoder.decode(uint8.subarray(pos, end)); } From c668176116de3f37021cf9a9672050384809b4e8 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Mon, 30 Mar 2026 20:12:02 +0100 Subject: [PATCH 18/20] exp32: micro-opts - remove len===0 check, inline pos read --- versions/experiment.mjs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/versions/experiment.mjs b/versions/experiment.mjs index dafe9ee..90fc850 100644 --- a/versions/experiment.mjs +++ b/versions/experiment.mjs @@ -1,6 +1,6 @@ /** - * Experiment 31b: Fix boundary case. Use sourceText.substr for source - * strings in ASCII sources. Use cumulative + bufferAsAscii for the rest. + * Experiment 32: Micro-optimizations - remove len===0 check, + * use local references for hot variables. */ // oxlint-disable prefer-const @@ -24,11 +24,10 @@ export function setup() { export function deserializeStr(pos) { let pos32 = pos >> 2, - len = uint32[pos32 + 2]; - if (len === 0) return ""; - pos = uint32[pos32]; - if (pos < sourceEndPos && sourceIsAscii) return sourceText.substr(pos, len); - let end = pos + len; - if (nonAsciiCum[end] === nonAsciiCum[pos]) return bufferAsAscii.substr(pos, len); - return textDecoder.decode(uint8.subarray(pos, end)); + len = uint32[pos32 + 2], + p = uint32[pos32]; + if (p < sourceEndPos && sourceIsAscii) return sourceText.substr(p, len); + let end = p + len; + if (nonAsciiCum[end] === nonAsciiCum[p]) return bufferAsAscii.substr(p, len); + return textDecoder.decode(uint8.subarray(p, end)); } From 2a109a17ca1753ca646e9a18123b2402f6e0e45e Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Wed, 1 Apr 2026 10:20:10 +0100 Subject: [PATCH 19/20] add sparse-table version: O(log k) binary search over non-ASCII drift table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scans the source region once in setup() to build a sparse translation table mapping multi-byte UTF-8 character positions to cumulative byte-vs-codeunit drift. deserializeStr() binary searches this table to convert byte offsets to UTF-16 offsets, extending sourceText.substr() to all source strings — not just those in the ASCII prefix. Benchmarks show 25-65% improvement over current on non-ASCII files, though the dense cumulative array approach (experiment.mjs) remains faster due to O(1) lookups vs O(log k) binary search. --- versions/sparse-table.mjs | 127 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 versions/sparse-table.mjs diff --git a/versions/sparse-table.mjs b/versions/sparse-table.mjs new file mode 100644 index 0000000..4e4bb11 --- /dev/null +++ b/versions/sparse-table.mjs @@ -0,0 +1,127 @@ +/** + * Sparse UTF-8/UTF-16 translation table: extends sourceText.substr() to ALL + * source strings, not just those in the ASCII prefix. + * + * Builds on pr20834-fnap.mjs. Instead of finding the first non-ASCII byte and + * giving up after it, we scan the source region once in setup() and record the + * cumulative byte-vs-codeunit drift at each multi-byte character. A binary + * search at read time converts byte offsets to UTF-16 offsets in O(log k), + * where k is the number of non-ASCII characters (typically tiny for source). + */ + +// oxlint-disable prefer-const + +const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }), + decodeStr = textDecoder.decode.bind(textDecoder); + +const { fromCodePoint } = String; + +// Sparse table: parallel arrays for cache-friendly binary search. +// tableOffsets[i] = byte offset just past the i-th multi-byte character. +// tableDrifts[i] = cumulative drift (utf8 bytes - utf16 code units) at that point. +let tableOffsets; +let tableDrifts; +let tableLen; + +export function setup() { + // First pass: count multi-byte characters to size the arrays. + let count = 0; + for (let i = 0; i < sourceEndPos; ) { + let b = uint8[i]; + if (b >= 0xf0) { count++; i += 4; } + else if (b >= 0xe0) { count++; i += 3; } + else if (b >= 0xc0) { count++; i += 2; } + else { i++; } + } + + tableOffsets = new Uint32Array(count); + tableDrifts = new Uint32Array(count); + tableLen = count; + + // Second pass: populate the table. + let drift = 0; + let idx = 0; + for (let i = 0; i < sourceEndPos; ) { + let b = uint8[i]; + if (b >= 0xf0) { + // 4-byte sequence -> 2 UTF-16 code units (surrogate pair), drift += 2 + drift += 2; + i += 4; + tableOffsets[idx] = i; + tableDrifts[idx] = drift; + idx++; + } else if (b >= 0xe0) { + // 3-byte sequence -> 1 UTF-16 code unit, drift += 2 + drift += 2; + i += 3; + tableOffsets[idx] = i; + tableDrifts[idx] = drift; + idx++; + } else if (b >= 0xc0) { + // 2-byte sequence -> 1 UTF-16 code unit, drift += 1 + drift += 1; + i += 2; + tableOffsets[idx] = i; + tableDrifts[idx] = drift; + idx++; + } else { + i++; + } + } +} + +// Binary search: find the largest index where tableOffsets[index] <= target. +// Returns -1 if target is before all entries (i.e. drift is 0). +function findDrift(target) { + let lo = 0, hi = tableLen - 1, result = -1; + while (lo <= hi) { + let mid = (lo + hi) >>> 1; + if (tableOffsets[mid] <= target) { + result = mid; + lo = mid + 1; + } else { + hi = mid - 1; + } + } + return result; +} + +// Get the cumulative drift at a byte offset. +function driftAt(bytePos) { + if (tableLen === 0) return 0; + let idx = findDrift(bytePos); + return idx < 0 ? 0 : tableDrifts[idx]; +} + +export function deserializeStr(pos) { + let pos32 = pos >> 2, + len = uint32[pos32 + 2]; + if (len === 0) return ""; + pos = uint32[pos32]; + + if (pos < sourceEndPos) { + if (sourceIsAscii) return sourceText.substr(pos, len); + + // Use the sparse table to convert byte offsets to UTF-16 offsets. + let startDrift = driftAt(pos); + let endDrift = driftAt(pos + len); + let utf16Start = pos - startDrift; + let utf16Len = len - (endDrift - startDrift); + return sourceText.substr(utf16Start, utf16Len); + } + + // Outside source region: fall back to concat/TextDecoder (from pr20834-fnap). + let end = pos + len; + if (len > 9) return decodeStr(uint8.subarray(pos, end)); + let out = "", + c; + do { + c = uint8[pos++]; + if (c < 128) out += fromCodePoint(c); + else { + out += decodeStr(uint8.subarray(pos - 1, end)); + break; + } + } while (pos < end); + return out; +} From 1e03e5a928368af223400085242c4a2c61097cf0 Mon Sep 17 00:00:00 2001 From: Josh Tuddenham Date: Wed, 1 Apr 2026 10:49:01 +0100 Subject: [PATCH 20/20] fix: check entire string is within source region before using substr A string starting in the source region but extending past sourceEndPos would get truncated by sourceText.substr(). Changed the guard from pos < sourceEndPos to pos + len <= sourceEndPos so boundary-spanning strings correctly fall through to the TextDecoder path. --- versions/sparse-table.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/versions/sparse-table.mjs b/versions/sparse-table.mjs index 4e4bb11..ef68fdc 100644 --- a/versions/sparse-table.mjs +++ b/versions/sparse-table.mjs @@ -99,7 +99,7 @@ export function deserializeStr(pos) { if (len === 0) return ""; pos = uint32[pos32]; - if (pos < sourceEndPos) { + if (pos + len <= sourceEndPos) { if (sourceIsAscii) return sourceText.substr(pos, len); // Use the sparse table to convert byte offsets to UTF-16 offsets.