From d3a7edaac05613ccc53070a72431838664d7e756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Migueles?= Date: Thu, 19 Dec 2024 22:21:54 -0300 Subject: [PATCH] POC - Unicode chars in identifiers (#317) * poc * Fix validator * Fix for new reference --------- Co-authored-by: Fernando Dodino --- src/parser.ts | 4 ++-- src/validator/index.ts | 4 ++-- test/parser.test.ts | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index e5c4f9c1..a36ecfeb 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -202,9 +202,9 @@ export const Import: Parser = node(ImportNode)(() => // COMMON // ────────────────────────────────────────────────────────────────────────────────────────────────────────────────── -export const name: Parser = lazy('identifier', () => regex(/[^\W\d]\w*/)) +export const name: Parser = lazy('identifier', () => regex(/^[\p{L}_][\p{L}\p{N}_]*/u)) -export const packageName: Parser = lazy('package identifier', () => regex(/[^\W\d][\w-]*/)) +export const packageName: Parser = lazy('package identifier', () => regex(/[^\W\d][\w\p{L}-]*/u)) export const FullyQualifiedReference: Parser> = node(ReferenceNode)(() => obj({ name: packageName.or(name).sepBy1(key('.')).tieWith('.') }) diff --git a/src/validator/index.ts b/src/validator/index.ts index f8e4c44b..a734fa9d 100644 --- a/src/validator/index.ts +++ b/src/validator/index.ts @@ -75,9 +75,9 @@ export const nameMatches = (regex: RegExp): (node: Node & { name: string }, code sourceMapForNodeName, ) -export const nameShouldBeginWithUppercase = nameMatches(/^[A-Z]/) +export const nameShouldBeginWithUppercase = nameMatches(/^[A-ZÑÁÉÍÓÚ]/) -export const nameShouldBeginWithLowercase = nameMatches(/^[a-z_<]/) +export const nameShouldBeginWithLowercase = nameMatches(/^[a-z_<ñáéíóú]/) export const nameShouldNotBeKeyword = error(node => !RESERVED_WORDS.includes(node.name || ''), diff --git a/test/parser.test.ts b/test/parser.test.ts index 5f4ef979..a645cdf0 100644 --- a/test/parser.test.ts +++ b/test/parser.test.ts @@ -365,6 +365,10 @@ describe('Wollok parser', () => { '_foo123'.should.be.be.parsedBy(parser).into('_foo123') }) + it('should parse names that contains unicode chars', () => { + '_foö123_and_bár'.should.be.be.parsedBy(parser).into('_foö123_and_bár') + }) + it('should not parse names with spaces', () => { 'foo bar'.should.not.be.parsedBy(parser) }) @@ -381,6 +385,9 @@ describe('Wollok parser', () => { '"foo"'.should.not.be.parsedBy(parser) }) + it('should not parse strings containing unicode as names', () => { + '"foö"'.should.not.be.parsedBy(parser) + }) }) @@ -1925,6 +1932,10 @@ class c {}` 'var v'.should.be.parsedBy(parser).into(new Variable({ name: 'v', isConstant: false })).and.be.tracedTo(0, 5) }) + it('should parse var declaration with non-ascii caracter in identifier', () => { + 'var ñ'.should.be.parsedBy(parser).into(new Variable({ name: 'ñ', isConstant: false })).and.be.tracedTo(0, 5) + }) + it('should parse var asignation', () => { 'var v = 5'.should.be.parsedBy(parser).into( new Variable({ @@ -2251,6 +2262,18 @@ class c {}` ) }) + it('should parse references starting with unicode letter', () => { + 'ñ'.should.be.parsedBy(parser).into(new Reference({ name: 'ñ' })).and.be.tracedTo(0, 1) + }) + + it('should parse references containing unicode letter', () => { + 'some_ñandu'.should.be.parsedBy(parser).into(new Reference({ name: 'some_ñandu' })).and.be.tracedTo(0, 10) + }) + + it('should not parse references starting with numbers that contain unicode letters', () => { + '4ñandu'.should.not.be.parsedBy(parser) + }) + it('should not parse references with spaces', () => { 'foo bar'.should.not.be.parsedBy(parser) })