Skip to content

Commit

Permalink
POC - Unicode chars in identifiers (#317)
Browse files Browse the repository at this point in the history
* poc

* Fix validator

* Fix for new reference

---------

Co-authored-by: Fernando Dodino <[email protected]>
  • Loading branch information
2 people authored and npasserini committed Dec 29, 2024
1 parent 443e8b5 commit d3a7eda
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 4 deletions.
4 changes: 2 additions & 2 deletions src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,9 @@ export const Import: Parser<ImportNode> = node(ImportNode)(() =>
// COMMON
// ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────

export const name: Parser<Name> = lazy('identifier', () => regex(/[^\W\d]\w*/))
export const name: Parser<Name> = lazy('identifier', () => regex(/^[\p{L}_][\p{L}\p{N}_]*/u))

export const packageName: Parser<Name> = lazy('package identifier', () => regex(/[^\W\d][\w-]*/))
export const packageName: Parser<Name> = lazy('package identifier', () => regex(/[^\W\d][\w\p{L}-]*/u))

export const FullyQualifiedReference: Parser<ReferenceNode<any>> = node(ReferenceNode)(() =>
obj({ name: packageName.or(name).sepBy1(key('.')).tieWith('.') })
Expand Down
4 changes: 2 additions & 2 deletions src/validator/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ export const nameMatches = (regex: RegExp): (node: Node & { name: string }, code
sourceMapForNodeName,
)

export const nameShouldBeginWithUppercase = nameMatches(/^[A-Z]/)
export const nameShouldBeginWithUppercase = nameMatches(/^[A-ZÑÁÉÍÓÚ]/)

export const nameShouldBeginWithLowercase = nameMatches(/^[a-z_<]/)
export const nameShouldBeginWithLowercase = nameMatches(/^[a-z_<ñáéíóú]/)

export const nameShouldNotBeKeyword = error<Parameter | Variable | Field | Method>(node =>
!RESERVED_WORDS.includes(node.name || ''),
Expand Down
23 changes: 23 additions & 0 deletions test/parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,10 @@ describe('Wollok parser', () => {
'_foo123'.should.be.be.parsedBy(parser).into('_foo123')
})

it('should parse names that contains unicode chars', () => {
'_foö123_and_bár'.should.be.be.parsedBy(parser).into('_foö123_and_bár')
})

it('should not parse names with spaces', () => {
'foo bar'.should.not.be.parsedBy(parser)
})
Expand All @@ -381,6 +385,9 @@ describe('Wollok parser', () => {
'"foo"'.should.not.be.parsedBy(parser)
})

it('should not parse strings containing unicode as names', () => {
'"foö"'.should.not.be.parsedBy(parser)
})
})


Expand Down Expand Up @@ -1925,6 +1932,10 @@ class c {}`
'var v'.should.be.parsedBy(parser).into(new Variable({ name: 'v', isConstant: false })).and.be.tracedTo(0, 5)
})

it('should parse var declaration with non-ascii caracter in identifier', () => {
'var ñ'.should.be.parsedBy(parser).into(new Variable({ name: 'ñ', isConstant: false })).and.be.tracedTo(0, 5)
})

it('should parse var asignation', () => {
'var v = 5'.should.be.parsedBy(parser).into(
new Variable({
Expand Down Expand Up @@ -2251,6 +2262,18 @@ class c {}`
)
})

it('should parse references starting with unicode letter', () => {
'ñ'.should.be.parsedBy(parser).into(new Reference({ name: 'ñ' })).and.be.tracedTo(0, 1)
})

it('should parse references containing unicode letter', () => {
'some_ñandu'.should.be.parsedBy(parser).into(new Reference({ name: 'some_ñandu' })).and.be.tracedTo(0, 10)
})

it('should not parse references starting with numbers that contain unicode letters', () => {
'4ñandu'.should.not.be.parsedBy(parser)
})

it('should not parse references with spaces', () => {
'foo bar'.should.not.be.parsedBy(parser)
})
Expand Down

0 comments on commit d3a7eda

Please sign in to comment.