Skip to content

Commit c2d735e

Browse files
mertcanaltinMylesBorins
authored andcommitted
test_runner: parse non-ascii character correctly
PR-URL: #45736 Reviewed-By: Yagiz Nizipli <[email protected]> Reviewed-By: Moshe Atlow <[email protected]>
1 parent 53fe480 commit c2d735e

File tree

3 files changed

+74
-24
lines changed

3 files changed

+74
-24
lines changed

lib/internal/test_runner/tap_lexer.js

+25-13
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@ const {
55
ArrayPrototypePush,
66
MathMax,
77
SafeSet,
8-
StringPrototypeIncludes,
8+
StringPrototypeCodePointAt,
99
StringPrototypeTrim,
1010
} = primordials;
1111
const {
1212
codes: { ERR_TAP_LEXER_ERROR },
1313
} = require('internal/errors');
1414

15+
const { isZeroWidthCodePoint } = require('internal/util/inspect');
16+
1517
const kEOL = '';
1618
const kEOF = '';
1719

@@ -474,18 +476,28 @@ class TapLexer {
474476
}
475477

476478
#isLiteralSymbol(char) {
477-
return (
478-
(char >= 'a' && char <= 'z') ||
479-
(char >= 'A' && char <= 'Z') ||
480-
this.#isSpecialCharacterSymbol(char)
481-
);
482-
}
483-
484-
#isSpecialCharacterSymbol(char) {
485-
// We deliberately do not include "# \ + -"" in this list
486-
// these are used for comments/reasons explanations, pragma and escape characters
487-
// whitespace is not included because it is handled separately
488-
return StringPrototypeIncludes('!"$%&\'()*,./:;<=>?@[]^_`{|}~', char);
479+
if (typeof char !== 'string') return false;
480+
const charCode = StringPrototypeCodePointAt(char);
481+
482+
if (isZeroWidthCodePoint(charCode)) return false;
483+
if (this.#isWhitespaceSymbol(char)) return false;
484+
const MAX_ASCII_CHAR_CODE = 0b111_1111; // ASCII is 7-bit long
485+
// Allow all non-latin characters.
486+
if (charCode > MAX_ASCII_CHAR_CODE) return true;
487+
const ZERO = 48; // 0
488+
const NINE = 58; // 9
489+
// Disallow numeric values
490+
if (charCode >= ZERO && char <= NINE) return false;
491+
492+
// Disallow characters with special meaning in TAP
493+
const HASH = 35; // #
494+
const BACKSLASH = 92; // \
495+
const PLUS = 43; // +
496+
const DASH = 45; // -
497+
498+
// Disallow characters with special meaning in TAP
499+
return charCode !== HASH && charCode !== BACKSLASH &&
500+
charCode !== PLUS && charCode !== DASH;
489501
}
490502

491503
#isWhitespaceSymbol(char) {

lib/internal/util/inspect.js

+13-11
Original file line numberDiff line numberDiff line change
@@ -2295,6 +2295,18 @@ function formatWithOptionsInternal(inspectOptions, args) {
22952295
return str;
22962296
}
22972297

2298+
function isZeroWidthCodePoint(code) {
2299+
return code <= 0x1F || // C0 control codes
2300+
(code >= 0x7F && code <= 0x9F) || // C1 control codes
2301+
(code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks
2302+
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
2303+
// Combining Diacritical Marks for Symbols
2304+
(code >= 0x20D0 && code <= 0x20FF) ||
2305+
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
2306+
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
2307+
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
2308+
}
2309+
22982310
if (internalBinding('config').hasIntl) {
22992311
const icu = internalBinding('icu');
23002312
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
@@ -2384,17 +2396,6 @@ if (internalBinding('config').hasIntl) {
23842396
);
23852397
};
23862398

2387-
const isZeroWidthCodePoint = (code) => {
2388-
return code <= 0x1F || // C0 control codes
2389-
(code >= 0x7F && code <= 0x9F) || // C1 control codes
2390-
(code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks
2391-
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
2392-
// Combining Diacritical Marks for Symbols
2393-
(code >= 0x20D0 && code <= 0x20FF) ||
2394-
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
2395-
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
2396-
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
2397-
};
23982399
}
23992400

24002401
/**
@@ -2414,4 +2415,5 @@ module.exports = {
24142415
formatWithOptions,
24152416
getStringWidth,
24162417
stripVTControlCharacters,
2418+
isZeroWidthCodePoint,
24172419
};

test/parallel/test-runner-tap-lexer.js

+36
Original file line numberDiff line numberDiff line change
@@ -444,3 +444,39 @@ ok 1
444444
assert.strictEqual(tokens[index].value, token.value);
445445
});
446446
}
447+
448+
// Test isLiteralSymbol method
449+
{
450+
const tokens = TAPLexer('ok 1 - description أتث讲演講👍🔥');
451+
452+
[
453+
{ kind: TokenKind.TAP_TEST_OK, value: 'ok' },
454+
{ kind: TokenKind.WHITESPACE, value: ' ' },
455+
{ kind: TokenKind.NUMERIC, value: '1' },
456+
{ kind: TokenKind.WHITESPACE, value: ' ' },
457+
{ kind: TokenKind.DASH, value: '-' },
458+
{ kind: TokenKind.WHITESPACE, value: ' ' },
459+
{ kind: TokenKind.LITERAL, value: 'description' },
460+
{ kind: TokenKind.WHITESPACE, value: ' ' },
461+
{ kind: TokenKind.LITERAL, value: 'أتث讲演講👍🔥' },
462+
{ kind: TokenKind.EOL, value: '' },
463+
].forEach((token, index) => {
464+
assert.strictEqual(tokens[index].kind, token.kind);
465+
assert.strictEqual(tokens[index].value, token.value);
466+
});
467+
}
468+
469+
{
470+
const tokens = TAPLexer('# comment أتث讲演講👍🔥');
471+
[
472+
{ kind: TokenKind.COMMENT, value: '#' },
473+
{ kind: TokenKind.WHITESPACE, value: ' ' },
474+
{ kind: TokenKind.LITERAL, value: 'comment' },
475+
{ kind: TokenKind.WHITESPACE, value: ' ' },
476+
{ kind: TokenKind.LITERAL, value: 'أتث讲演講👍🔥' },
477+
{ kind: TokenKind.EOL, value: '' },
478+
].forEach((token, index) => {
479+
assert.strictEqual(tokens[index].kind, token.kind);
480+
assert.strictEqual(tokens[index].value, token.value);
481+
});
482+
}

0 commit comments

Comments
 (0)