Skip to content

Commit

Permalink
CLDR-15253 Unicode 15 new scripts & script metadata
Browse files Browse the repository at this point in the history
See #1668
  • Loading branch information
markusicu authored and Squash Bot committed Jan 3, 2022
1 parent 353527c commit 5fde87b
Show file tree
Hide file tree
Showing 13 changed files with 56 additions and 37 deletions.
2 changes: 2 additions & 0 deletions common/main/en.xml
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,7 @@ annotations.
<script type="Jurc">Jurchen</script>
<script type="Kali">Kayah Li</script>
<script type="Kana">Katakana</script>
<script type="Kawi">Kawi</script>
<script type="Khar">Kharoshthi</script>
<script type="Khmr">Khmer</script>
<script type="Khoj">Khojki</script>
Expand Down Expand Up @@ -783,6 +784,7 @@ annotations.
<script type="Mtei">Meitei Mayek</script>
<script type="Mult">Multani</script>
<script type="Mymr">Myanmar</script>
<script type="Nagm">Nag Mundari</script>
<script type="Nand">Nandinagari</script>
<script type="Narb">Old North Arabian</script>
<script type="Nbat">Nabataean</script>
Expand Down
2 changes: 2 additions & 0 deletions common/main/fr.xml
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,7 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/
<script type="Jurc" draft="unconfirmed">jurchen</script>
<script type="Kali">kayah li</script>
<script type="Kana">katakana</script>
<script type="Kawi" draft="provisional">kawi</script>
<script type="Khar">kharochthî</script>
<script type="Khmr">khmer</script>
<script type="Khoj" draft="unconfirmed">khodjki</script>
Expand Down Expand Up @@ -768,6 +769,7 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/
<script type="Mtei">meitei mayek</script>
<script type="Mult" draft="unconfirmed">multani</script>
<script type="Mymr">birman</script>
<script type="Nagm" draft="provisional">nag mundari</script>
<script type="Nand">nandinagari</script>
<script type="Narb" draft="unconfirmed">nord-arabique</script>
<script type="Nbat" draft="unconfirmed">nabatéen</script>
Expand Down
12 changes: 7 additions & 5 deletions common/properties/scriptMetadata.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ Cham; 33; AA00; VN; 1; LIMITED_USE; NO; NO; YES; NO; NO
Cher; 33; 13C4; US; 2; LIMITED_USE; NO; NO; NO; NO; YES
Chrs; 33; 10FBF; UZ; 1; EXCLUSION; YES; NO; YES; NO; NO
Copt; 33; 03E2; EG; 1; EXCLUSION; NO; NO; MIN; NO; YES
Cpmn; 33; 12FE5; CY; 2; EXCLUSION; NO; NO; NO; YES; NO # provisional data for future Unicode 14.0 script
Cpmn; 33; 12FE5; CY; 2; EXCLUSION; NO; NO; NO; YES; NO
Cprt; 33; 10800; CY; 1; EXCLUSION; YES; NO; NO; NO; NO
Diak; 33; 1190C; MV; 1; EXCLUSION; NO; NO; YES; YES; NO
Dogr; 33; 1180B; IN; 1; EXCLUSION; NO; NO; YES; NO; NO
Expand All @@ -137,6 +137,7 @@ Hung; 33; 10CA1; HU; 1; EXCLUSION; YES; NO; NO; NO; YES
Ital; 33; 10300; IT; 1; EXCLUSION; NO; NO; NO; NO; NO
Java; 33; A984; ID; 1; LIMITED_USE; NO; NO; YES; NO; NO
Kali; 33; A90A; MM; 1; LIMITED_USE; NO; NO; MIN; NO; NO
Kawi; 33; 11F1B; ID; 1; EXCLUSION; NO; YES; YES; NO; NO # provisional data for future Unicode 15.0 script
Khar; 33; 10A00; PK; 1; EXCLUSION; YES; NO; YES; NO; NO
Khoj; 33; 11208; IN; 1; EXCLUSION; NO; NO; NO; NO; NO
Kits; 33; 18C65; CN; 2; EXCLUSION; NO; YES; NO; YES; NO
Expand All @@ -163,6 +164,7 @@ Mong; 33; 1826; MN; 1; EXCLUSION; NO; NO; YES; NO; NO
Mroo; 33; 16A4F; BD; 1; EXCLUSION; NO; NO; NO; NO; NO
Mtei; 33; ABC0; IN; 1; LIMITED_USE; NO; NO; YES; NO; NO
Mult; 33; 1128F; PK; 1; EXCLUSION; NO; NO; NO; NO; NO
Nagm; 33; 1E4E6; IN; 1; EXCLUSION; NO; NO; NO; NO; NO # provisional data for future Unicode 15.0 script
Nand; 33; 119CE; IN; 1; EXCLUSION; NO; NO; YES; NO; NO
Narb; 33; 10A95; SA; 1; EXCLUSION; YES; NO; NO; NO; NO
Nbat; 33; 10896; JO; 1; EXCLUSION; YES; NO; NO; NO; NO
Expand All @@ -174,7 +176,7 @@ Olck; 33; 1C5A; IN; 1; LIMITED_USE; NO; NO; NO; NO; NO
Orkh; 33; 10C00; MN; 1; EXCLUSION; YES; NO; NO; NO; NO
Osge; 33; 104B5; US; 1; LIMITED_USE; NO; NO; NO; NO; YES
Osma; 33; 10480; SO; 1; EXCLUSION; NO; NO; NO; NO; NO
Ougr; 33; 10F7C; 143; 1; EXCLUSION; YES; NO; YES; NO; NO # provisional data for future Unicode 14.0 script
Ougr; 33; 10F7C; 143; 1; EXCLUSION; YES; NO; YES; NO; NO
Palm; 33; 10873; SY; 1; EXCLUSION; YES; NO; NO; NO; NO
Pauc; 33; 11AC0; MM; 1; EXCLUSION; NO; NO; NO; NO; NO
Perm; 33; 1036B; RU; 1; EXCLUSION; NO; NO; NO; NO; NO
Expand Down Expand Up @@ -211,11 +213,11 @@ Tavt; 33; AA80; VN; 1; LIMITED_USE; NO; YES; YES; NO; NO
Tfng; 33; 2D30; MA; 1; LIMITED_USE; NO; NO; NO; NO; NO
Tglg; 33; 1703; PH; 1; EXCLUSION; NO; NO; MIN; NO; NO
Tirh; 33; 11484; IN; 1; EXCLUSION; NO; NO; NO; NO; NO
Tnsa; 33; 16ABC; IN; 1; EXCLUSION; NO; NO; NO; NO; NO # provisional data for future Unicode 14.0 script
Toto; 33; 1E290; IN; 1; EXCLUSION; NO; NO; NO; NO; NO # provisional data for future Unicode 14.0 script
Tnsa; 33; 16ABC; IN; 1; EXCLUSION; NO; NO; NO; NO; NO
Toto; 33; 1E290; IN; 1; EXCLUSION; NO; NO; NO; NO; NO
Ugar; 33; 10380; SY; 1; EXCLUSION; NO; NO; NO; NO; NO
Vaii; 33; A549; LR; 2; LIMITED_USE; NO; NO; NO; YES; NO
Vith; 33; 10582; AL; 1; EXCLUSION; NO; NO; NO; NO; YES # provisional data for future Unicode 14.0 script
Vith; 33; 10582; AL; 1; EXCLUSION; NO; NO; NO; NO; YES
Wara; 33; 118B4; IN; 1; EXCLUSION; NO; NO; NO; NO; YES
Wcho; 33; 1E2E1; IN; 1; LIMITED_USE; NO; NO; NO; NO; NO
Xpeo; 33; 103A0; IR; 1; EXCLUSION; NO; NO; NO; NO; NO
Expand Down
2 changes: 1 addition & 1 deletion common/supplemental/coverageLevels.xml
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ For terms of use, see http://www.unicode.org/copyright.html
<coverageVariable key="%script40" value="(Latn|Hans|Hant|Cyrl|Arab)"/>
<coverageVariable key="%script60" value="(Jpan|Kore)"/>
<coverageVariable key="%script80" value="(Armn|Beng|Bopo|Brai|Deva|Ethi|Geor|Grek|Gujr|Guru|Hani|Hang|Hebr|Hira|Knda|Kana|Khmr|Laoo|Mlym|Mong|Mymr|Orya|Sinh|Taml|Telu|Thaa|Thai|Tibt|Hanb|Hrkt|Jamo|Jpan|Kore|Zmth|Zsye|Zsym|Zxxx|Zyyy|Zzzz)"/>
<coverageVariable key="%script100" value="(Afak|Aghb|Ahom|Armi|Avst|Bali|Bamu|Bass|Batk|Blis|Brah|Bugi|Buhd|Cakm|Cans|Cari|Cham|Cher|Chrs|Cirt|Copt|Cpmn|Cprt|Cyrs|Diak|Dogr|Dsrt|Dupl|Egy[dhp]|Elba|Elym|Geok|Glag|Gong|Gonm|Goth|Gran|Hatr|Hano|Hluw|Hmng|Hmnp|Hrkt|Hung|Inds|Ital|Java|Jurc|Kali|Khar|Khoj|Kits|Kpel|Kthi|Lana|Lat[fg]|Lepc|Limb|Lin[ab]|Lisu|Loma|Ly[cd]i|Mahj|Maka|Man[di]|Maya|Medf|Mend|Mer[co]|Modi|Moon|Mroo|Mtei|Mult|Nand|Narb|Nbat|Nkgb|Nkoo|Nshu|Ogam|Olck|Orkh|Osma|Ougr|Palm|Pauc|Perm|Phag|Phl[ipv]|Phnx|Plrd|Prti|Rjng|Rohg|Roro|Runr|Samr|Sar[ab]|Saur|Sgnw|Shaw|Shrd|Sidd|Sind|Sogd|Sogo|Sora|Soyo|Sund|Sylo|Syr[cejn]|Tagb|Takr|Tal[eu]|Tang|Tavt|Teng|Tfng|Tglg|Tirh|Tnsa|Toto|Ugar|Vaii|Visp|Vith|Wara|Wcho|Wole|Xpeo|Xsux|Yezi|Yiii|Zanb|Zinh|Zmth)"/>
<coverageVariable key="%script100" value="(Afak|Aghb|Ahom|Armi|Avst|Bali|Bamu|Bass|Batk|Blis|Brah|Bugi|Buhd|Cakm|Cans|Cari|Cham|Cher|Chrs|Cirt|Copt|Cpmn|Cprt|Cyrs|Diak|Dogr|Dsrt|Dupl|Egy[dhp]|Elba|Elym|Geok|Glag|Gong|Gonm|Goth|Gran|Hatr|Hano|Hluw|Hmng|Hmnp|Hrkt|Hung|Inds|Ital|Java|Jurc|Kali|Kawi|Khar|Khoj|Kits|Kpel|Kthi|Lana|Lat[fg]|Lepc|Limb|Lin[ab]|Lisu|Loma|Ly[cd]i|Mahj|Maka|Man[di]|Maya|Medf|Mend|Mer[co]|Modi|Moon|Mroo|Mtei|Mult|Nagm|Nand|Narb|Nbat|Nkgb|Nkoo|Nshu|Ogam|Olck|Orkh|Osma|Ougr|Palm|Pauc|Perm|Phag|Phl[ipv]|Phnx|Plrd|Prti|Rjng|Rohg|Roro|Runr|Samr|Sar[ab]|Saur|Sgnw|Shaw|Shrd|Sidd|Sind|Sogd|Sogo|Sora|Soyo|Sund|Sylo|Syr[cejn]|Tagb|Takr|Tal[eu]|Tang|Tavt|Teng|Tfng|Tglg|Tirh|Tnsa|Toto|Ugar|Vaii|Visp|Vith|Wara|Wcho|Wole|Xpeo|Xsux|Yezi|Yiii|Zanb|Zinh|Zmth)"/>
<coverageVariable key="%shortLong" value="(short|long)"/>
<coverageVariable key="%anyAlphaNum" value="([-a-zA-Z0-9]+)"/>
<coverageVariable key="%ssTypes" value="(standard|none)"/>
Expand Down
8 changes: 7 additions & 1 deletion common/supplemental/likelySubtags.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<!--
Copyright © 1991-2021 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
SPDX-License-Identifier: Unicode-DFS-2016
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
-->
<!--
Expand Down Expand Up @@ -1122,6 +1122,8 @@ not be patched by hand, as any changes made in that fashion may be lost.
<!--{ Kamba; ?; ? } => { Kamba; Latin; Kenya }-->
<likelySubtag from="kao" to="kao_Latn_ML"/>
<!--{ Xaasongaxango; ?; ? } => { Xaasongaxango; Latin; Mali }-->
<likelySubtag from="kaw" to="kaw_Kawi_ID"/>
<!--{ Kawi; ?; ? } => { Kawi; Kawi; Indonesia }-->
<likelySubtag from="kbd" to="kbd_Cyrl_RU"/>
<!--{ Kabardian; ?; ? } => { Kabardian; Cyrillic; Russia }-->
<likelySubtag from="kbm" to="kbm_Latn_ZZ"/>
Expand Down Expand Up @@ -3500,6 +3502,8 @@ not be patched by hand, as any changes made in that fashion may be lost.
<!--{ ?; Kayah Li; ? } => { Eastern Kayah; Kayah Li; Myanmar (Burma) }-->
<likelySubtag from="und_Kana" to="ja_Kana_JP"/>
<!--{ ?; Katakana; ? } => { Japanese; Katakana; Japan }-->
<likelySubtag from="und_Kawi" to="kaw_Kawi_ID"/>
<!--{ ?; Kawi; ? } => { Kawi; Kawi; Indonesia }-->
<likelySubtag from="und_Khar" to="pra_Khar_PK"/>
<!--{ ?; Kharoshthi; ? } => { Prakrit languages; Kharoshthi; Pakistan }-->
<likelySubtag from="und_Khmr" to="km_Khmr_KH"/>
Expand Down Expand Up @@ -3606,6 +3610,8 @@ not be patched by hand, as any changes made in that fashion may be lost.
<!--{ ?; Myanmar; India } => { Khamti; Myanmar; India }-->
<likelySubtag from="und_Mymr_TH" to="mnw_Mymr_TH"/>
<!--{ ?; Myanmar; Thailand } => { Mon; Myanmar; Thailand }-->
<likelySubtag from="und_Nagm" to="unr_Nagm_IN"/>
<!--{ ?; Nag Mundari; ? } => { Mundari; Nag Mundari; India }-->
<likelySubtag from="und_Nand" to="sa_Nand_IN"/>
<!--{ ?; Nandinagari; ? } => { Sanskrit; Nandinagari; India }-->
<likelySubtag from="und_Narb" to="xna_Narb_SA"/>
Expand Down
9 changes: 4 additions & 5 deletions common/validity/currency.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
<?xml version='1.0' encoding='UTF-8' ?>
<!DOCTYPE supplementalData SYSTEM '../../common/dtd/ldmlSupplemental.dtd'>
<!--
© 1991-2017 Unicode, Inc.
Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
For terms of use, see http://www.unicode.org/copyright.html.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/).
Copyright © 1991-2021 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
SPDX-License-Identifier: Unicode-DFS-2016
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
GENERATED DATA — do not manually update!
Generated by tool: GenerateValidityXml
Tool documented on: http://cldr.unicode.org/development/updating-codes/update-validity-xml
Expand Down
9 changes: 4 additions & 5 deletions common/validity/language.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
<?xml version='1.0' encoding='UTF-8' ?>
<!DOCTYPE supplementalData SYSTEM '../../common/dtd/ldmlSupplemental.dtd'>
<!--
© 1991-2017 Unicode, Inc.
Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
For terms of use, see http://www.unicode.org/copyright.html.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/).
Copyright © 1991-2021 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
SPDX-License-Identifier: Unicode-DFS-2016
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
GENERATED DATA — do not manually update!
Generated by tool: GenerateValidityXml
Tool documented on: http://cldr.unicode.org/development/updating-codes/update-validity-xml
Expand Down
9 changes: 4 additions & 5 deletions common/validity/region.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
<?xml version='1.0' encoding='UTF-8' ?>
<!DOCTYPE supplementalData SYSTEM '../../common/dtd/ldmlSupplemental.dtd'>
<!--
© 1991-2017 Unicode, Inc.
Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
For terms of use, see http://www.unicode.org/copyright.html.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/).
Copyright © 1991-2021 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
SPDX-License-Identifier: Unicode-DFS-2016
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
GENERATED DATA — do not manually update!
Generated by tool: GenerateValidityXml
Tool documented on: http://cldr.unicode.org/development/updating-codes/update-validity-xml
Expand Down
15 changes: 7 additions & 8 deletions common/validity/script.xml
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
<?xml version='1.0' encoding='UTF-8' ?>
<!DOCTYPE supplementalData SYSTEM '../../common/dtd/ldmlSupplemental.dtd'>
<!--
© 1991-2017 Unicode, Inc.
Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
For terms of use, see http://www.unicode.org/copyright.html.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/).
Copyright © 1991-2021 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
SPDX-License-Identifier: Unicode-DFS-2016
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
GENERATED DATA — do not manually update!
Generated by tool: GenerateValidityXml
Tool documented on: http://cldr.unicode.org/development/updating-codes/update-validity-xml
-->
<supplementalData>
<version number="$Revision$"/>
<idValidity>
<id type='script' idStatus='regular'> <!-- 165 items -->
<id type='script' idStatus='regular'> <!-- 167 items -->
Adlm Aghb Ahom Arab Armi Armn Avst
Bali Bamu Bass Batk Beng Bhks Bopo Brah~i Bugi Buhd
Cakm Cans Cari Cham Cher Chrs Copt Cpmn Cprt Cyrl
Expand All @@ -23,10 +22,10 @@
Hanb Hang Hani Hano Hans~t Hatr Hebr Hira Hluw Hmng Hmnp Hrkt Hung
Ital
Jamo Java Jpan
Kali Kana Khar Khmr Khoj Kits Knda Kore Kthi
Kali Kana Kawi Khar Khmr Khoj Kits Knda Kore Kthi
Lana Laoo Latn Lepc Limb Lina~b Lisu Lyci Lydi
Mahj Maka Mand Mani Marc Medf Mend Merc Mero Mlym Modi Mong Mroo Mtei Mult Mymr
Nand Narb Nbat Newa Nkoo Nshu
Nagm Nand Narb Nbat Newa Nkoo Nshu
Ogam Olck Orkh Orya Osge Osma Ougr
Palm Pauc Perm Phag Phli Phlp Phnx Plrd Prti
Rjng Rohg Runr
Expand Down
9 changes: 4 additions & 5 deletions common/validity/variant.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
<?xml version='1.0' encoding='UTF-8' ?>
<!DOCTYPE supplementalData SYSTEM '../../common/dtd/ldmlSupplemental.dtd'>
<!--
© 1991-2017 Unicode, Inc.
Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
For terms of use, see http://www.unicode.org/copyright.html.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/).
Copyright © 1991-2021 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
SPDX-License-Identifier: Unicode-DFS-2016
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
GENERATED DATA — do not manually update!
Generated by tool: GenerateValidityXml
Tool documented on: http://cldr.unicode.org/development/updating-codes/update-validity-xml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public class ScriptMetadata {
private static final int MAX_RANK = 33;
private static final String DATA_FILE = "/org/unicode/cldr/util/data/Script_Metadata.csv";
private static final VersionInfo UNICODE_VERSION = VersionInfo.getInstance(
CldrUtility.getProperty("SCRIPT_UNICODE_VERSION", "14"));
CldrUtility.getProperty("SCRIPT_UNICODE_VERSION", "15"));

// To get the data, go do the Script MetaData spreadsheet
// Download As Comma Separated Items into DATA_FILE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,6 @@ WR,Name,Script_Code,Age,Size,Sample,Sample_Code,Origin Country,~Density,Likely L
159,Old_Uyghur,Ougr,14.0,0,𐽼,10F7C,Central Asia,1,Old Uyghur,oui,Exclusion,Yes,no,Yes,no,no
160,Tangsa,Tnsa,14.0,0,𖪼,16ABC,India,1,Tangsa,nst,Exclusion,no,no,no,no,no
161,Toto,Toto,14.0,0,𞊐,1E290,India,1,Toto,txo,Exclusion,no,no,no,no,no
162,Vithkuqi,Vith,14.0,0,𐖂,10582,Albania,1,Albanian,sq,Exclusion,no,no,no,no,Yes
162,Vithkuqi,Vith,14.0,0,𐖂,10582,Albania,1,Albanian,sq,Exclusion,no,no,no,no,Yes
163,Kawi,Kawi,15.0,0,𑼛,11F1B,Indonesia,1,Kawi,kaw,Exclusion,no,yes,Yes,no,no
164,Nag Mundari,Nagm,15.0,0,𞓦,1E4E6,India,1,Mundari,unr,Exclusion,no,no,no,no,no
Original file line number Diff line number Diff line change
Expand Up @@ -44793,6 +44793,11 @@ Description: Katakana
Added: 2005-10-16
%%
Type: script
Subtag: Kawi
Description: Kawi
Added: 2021-12-22
%%
Type: script
Subtag: Khar
Description: Kharoshthi
Added: 2005-10-16
Expand Down Expand Up @@ -45012,6 +45017,11 @@ Description: Burmese
Added: 2005-10-16
%%
Type: script
Subtag: Nagm
Description: Nag Mundari
Added: 2021-12-22
%%
Type: script
Subtag: Nand
Description: Nandinagari
Added: 2018-10-28
Expand Down

0 comments on commit 5fde87b

Please sign in to comment.