Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add missing unicode properties #1326

Merged
merged 3 commits into from
Oct 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module.exports = {
},
overrides: [
{
files: ["acorn/src/bin/*.js", "bin/generate-identifier-regex.js"],
files: ["acorn/src/bin/*.js", "bin/generate-identifier-regex.js", "bin/generate-unicode-script-values.js"],
rules: {
"no-console": "off"
}
Expand Down
2 changes: 2 additions & 0 deletions acorn/src/generated/scriptValuesAddedInUnicode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// This file was generated by "bin/generate-unicode-script-values.js". Do not modify manually!
export default "Gara Garay Gukh Gurung_Khema Hrkt Katakana_Or_Hiragana Kawi Kirat_Rai Krai Nag_Mundari Nagm Ol_Onal Onao Sunu Sunuwar Todhri Todr Tulu_Tigalari Tutg Unknown Zzzz"
3 changes: 2 additions & 1 deletion acorn/src/unicode-property-data.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {wordsRegexp} from "./util.js"
import scriptValuesAddedInUnicode from "./generated/scriptValuesAddedInUnicode.js"

// This file contains Unicode properties extracted from the ECMAScript specification.
// The lists are extracted like so:
Expand Down Expand Up @@ -42,7 +43,7 @@ const ecma10ScriptValues = ecma9ScriptValues + " Dogra Dogr Gunjala_Gondi Gong H
const ecma11ScriptValues = ecma10ScriptValues + " Elymaic Elym Nandinagari Nand Nyiakeng_Puachue_Hmong Hmnp Wancho Wcho"
const ecma12ScriptValues = ecma11ScriptValues + " Chorasmian Chrs Diak Dives_Akuru Khitan_Small_Script Kits Yezi Yezidi"
const ecma13ScriptValues = ecma12ScriptValues + " Cypro_Minoan Cpmn Old_Uyghur Ougr Tangsa Tnsa Toto Vithkuqi Vith"
const ecma14ScriptValues = ecma13ScriptValues + " Hrkt Katakana_Or_Hiragana Kawi Nag_Mundari Nagm Unknown Zzzz"
const ecma14ScriptValues = ecma13ScriptValues + " " + scriptValuesAddedInUnicode

const unicodeScriptValues = {
9: ecma9ScriptValues,
Expand Down
59 changes: 59 additions & 0 deletions bin/generate-unicode-script-values.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"use strict"

const fs = require("fs")
const path = require("path")

import("../acorn/src/unicode-property-data.js")
.then(m => {
return m.default[13].nonBinary.Script
})
.then(async(reScriptValuesAddedInES) => {
const scriptValues = new Set()
for await (const value of getLatestUnicodeScriptValues()) {
scriptValues.add(value)
}
const scriptValuesAddedInUnicode = "export default " +
JSON.stringify(
[...scriptValues]
// The unicode script values now follow the Unicode spec as of ES2023,
// but prior to ES2022 they were listed in the ES2022 spec.
// The generated file lists all the unicode script values except those listed before ES2022.
.filter(value => !reScriptValuesAddedInES.test(value))
.sort()
.join(" ")
)

writeGeneratedFile("scriptValuesAddedInUnicode", scriptValuesAddedInUnicode)

console.log("Done. The generated files must be committed.")
})

function writeGeneratedFile(filename, content) {
const comment = "// This file was generated by \"bin/" + path.basename(__filename) + "\". Do not modify manually!"
fs.writeFileSync(path.resolve("./acorn/src/generated", filename + ".js"), comment + "\n" + content + "\n", "utf8")
}

/**
* Gets the all unicode script values from the latest PropertyValueAliases.
*/
async function * getLatestUnicodeScriptValues() {
const response = await fetch("https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt")
const lines = (await response.text()).split("\n")
for (const line of lines) {
if (!line || line.startsWith("#")) {
continue
}
const [propertyAlias, alias, canonical, ...remaining] = line
.split("#")[0] // strip comments
.split(";") // split by semicolon
.map((x) => x.trim()) // trim

if (propertyAlias !== "sc") {
continue
}

yield canonical
yield alias
yield * remaining
}
}
36 changes: 4 additions & 32 deletions bin/test262.whitelist
Original file line number Diff line number Diff line change
@@ -1,32 +1,4 @@
built-ins/RegExp/property-escapes/generated/Script_-_Garay.js (default)
built-ins/RegExp/property-escapes/generated/Script_-_Garay.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_-_Gurung_Khema.js (default)
built-ins/RegExp/property-escapes/generated/Script_-_Gurung_Khema.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_-_Kirat_Rai.js (default)
built-ins/RegExp/property-escapes/generated/Script_-_Kirat_Rai.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_-_Ol_Onal.js (default)
built-ins/RegExp/property-escapes/generated/Script_-_Ol_Onal.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_-_Sunuwar.js (default)
built-ins/RegExp/property-escapes/generated/Script_-_Sunuwar.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_-_Todhri.js (default)
built-ins/RegExp/property-escapes/generated/Script_-_Todhri.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_-_Tulu_Tigalari.js (default)
built-ins/RegExp/property-escapes/generated/Script_-_Tulu_Tigalari.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Garay.js (default)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Garay.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Gurung_Khema.js (default)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Gurung_Khema.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Kirat_Rai.js (default)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Kirat_Rai.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Ol_Onal.js (default)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Ol_Onal.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Sunuwar.js (default)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Sunuwar.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Todhri.js (default)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Todhri.js (strict mode)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Tulu_Tigalari.js (default)
built-ins/RegExp/property-escapes/generated/Script_Extensions_-_Tulu_Tigalari.js (strict mode)
language/import/import-attributes/json-invalid.js (default)
language/import/import-attributes/json-invalid.js (strict mode)
language/import/import-attributes/json-named-bindings.js (default)
language/import/import-attributes/json-named-bindings.js (strict mode)
language/import/import-attributes/json-invalid.js (default)
language/import/import-attributes/json-invalid.js (strict mode)
language/import/import-attributes/json-named-bindings.js (default)
language/import/import-attributes/json-named-bindings.js (strict mode)
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
"build:loose": "rollup -c acorn-loose/rollup.config.mjs",
"build:main": "rollup -c acorn/rollup.config.mjs",
"build:walk": "rollup -c acorn-walk/rollup.config.mjs",
"generate": "node bin/generate-identifier-regex.js",
"generate": "npm run generate:identifier-regex && npm run generate:unicode-script-values",
"generate:identifier-regex": "node bin/generate-identifier-regex.js",
"generate:unicode-script-values": "node bin/generate-unicode-script-values.js",
"lint": "eslint .",
"prepare": "npm run test",
"pretest": "npm run build:main && npm run build:loose",
Expand Down
Loading