Skip to content

Commit

Permalink
Added support for UnicodeCombiningMark, fixes google#3639.
Browse files Browse the repository at this point in the history
  • Loading branch information
ctjlewis committed Jul 19, 2020
1 parent 76afa8e commit 5ad0744
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 2 deletions.
2 changes: 2 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
mvn -DskipTests -pl externs/pom.xml,pom-main.xml,pom-main-shaded.xml
50 changes: 48 additions & 2 deletions src/com/google/javascript/jscomp/parsing/parser/Scanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -901,8 +901,52 @@ private static boolean isIdentifierStart(char ch) {
| (ch >= 0x03B1 & ch <= 0x03C9); // Greek lowercase letters
}

/**
Implement ECMAScript grammar for isIdentifierPart.
*/
private static boolean isCombiningMark(char ch) {
return Character.getType(ch) == Character.NON_SPACING_MARK;
}

// TODO (ctjl): Implement
private static boolean isConnectorPunctuation() {
return true;
}

// TODO (ctjl): Implement
private static boolean isZeroWidthJoiner() {
return true;
}

// TODO (ctjl): Implement
private static boolean isZeroWidthNonJoiner() {
return true;
}

@SuppressWarnings("ShortCircuitBoolean") // Intentional to minimize branches in this code
private static boolean isIdentifierPart(char ch) {
/**
https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
IdentifierPart ::
IdentifierStart
✓ isIdentifierPart()
UnicodeCombiningMark
✓ isCombiningMark()
UnicodeDigit
✓ Character.isDigit()
UnicodeConnectorPunctuation
✓ isConnectorPunctuation()
<ZWNJ>
✓ isZeroWidthNonJoiner()
<ZWJ>
✓ isZeroWidthJoiner()
*/

// Most code is written in pure ASCII, so create a fast path here.
if (ch <= 127) {
return ((ch >= 'A' & ch <= 'Z')
Expand All @@ -913,8 +957,10 @@ private static boolean isIdentifierPart(char ch) {

// Handle non-ASCII characters.
// TODO(tjgq): This should include all characters with the ID_Continue property, plus
// Zero Width Non-Joiner and Zero Width Joiner.
return isIdentifierStart(ch) || Character.isDigit(ch);
// TODO(ctjl): Implement remaining grammar (zero-width joiners, etc.)
return isIdentifierStart(ch)
|| isCombiningMark(ch)
|| Character.isDigit(ch);
}

private Token scanStringLiteral(int beginIndex, char terminator) {
Expand Down
3 changes: 3 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
var bar = {
: "foo"
};

0 comments on commit 5ad0744

Please sign in to comment.