Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 23 additions & 9 deletions unicodetools/data/ucd/dev/IndicSyllabicCategory.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# IndicSyllabicCategory-16.0.0.txt
# Date: 2023-11-10, 22:06:18 GMT
# Date: 2023-11-13, 19:36:00 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -262,14 +262,15 @@ A9B3 ; Nukta # Mn JAVANESE SIGN CECAK TELU

# Indic_Syllabic_Category=Virama

# Virama (killing of inherent vowel in consonant sequence
# or consonant stacker)
# Virama (kills inherent vowel of consonant; may act as a Pure_Killer
# or Invisible_Stacker depending on context)
# Only includes characters that can act both as visible killer viramas
# and consonant stackers. Separate property values exist for characters
# that can only act as pure killers or only as consonant stackers.
# that can only act as pure killers, only as reordering killers, or only
# as consonant stackers.

# [Derivation: (ccc=9) - (InSC=Pure_Killer) - (InSC=Invisible_Stacker)
# - (InSC=Number_Joiner) - 2D7F]
# - (InSC=Reordering_Killer) - (InSC=Number_Joiner) - 2D7F]

094D ; Virama # Mn DEVANAGARI SIGN VIRAMA
09CD ; Virama # Mn BENGALI SIGN VIRAMA
Expand Down Expand Up @@ -303,8 +304,9 @@ A9C0 ; Virama # Mc JAVANESE PANGKON

# Indic_Syllabic_Category=Pure_Killer

# Pure killer (killing of inherent vowel in consonant sequence,
# with no consonant stacking behavior)
# Pure killer (kills inherent vowel of consonant; always visible;
# has no conjuct formation, consonant stacking, or reordering
# behavior)

# [Not derivable]

Expand All @@ -320,7 +322,6 @@ A9C0 ; Virama # Mc JAVANESE PANGKON
17D1 ; Pure_Killer # Mn KHMER SIGN VIRIAM
1A7A ; Pure_Killer # Mn TAI THAM SIGN RA HAAM
1BAA ; Pure_Killer # Mc SUNDANESE SIGN PAMAAEH
1BF2..1BF3 ; Pure_Killer # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN
A82C ; Pure_Killer # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
A953 ; Pure_Killer # Mc REJANG VIRAMA
ABED ; Pure_Killer # Mn MEETEI MAYEK APUN IYEK
Expand All @@ -339,9 +340,22 @@ ABED ; Pure_Killer # Mn MEETEI MAYEK APUN IYEK

# ================================================

# Indic_Syllabic_Category=Reordering_Killer

# Reordering killer (kills inherent vowel of consonant; always visible;
# may cause consonant reordering)

# [Not derivable]

1BF2..1BF3 ; Reordering_Killer # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN

# ================================================

# Indic_Syllabic_Category=Invisible_Stacker

# Invisible stacker (invisible consonant stacker virama).
# Invisible stacker (usually kills inherent vowel of consonant; is not visible
# by itself; causes conjunct formation or consonant
# stacking)
#
# Note that in some scripts, such as Kharoshthi and Masaram Gondi, an invisible
# stacker may have a second function, changing the shape and/or location of the
Expand Down
1 change: 1 addition & 0 deletions unicodetools/data/ucd/dev/PropertyValueAliases.txt
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,7 @@ InSC; Number_Joiner ; Number_Joiner
InSC; Other ; Other
InSC; Pure_Killer ; Pure_Killer
InSC; Register_Shifter ; Register_Shifter
InSC; Reordering_Killer ; Reordering_Killer
InSC; Syllable_Modifier ; Syllable_Modifier
InSC; Tone_Letter ; Tone_Letter
InSC; Tone_Mark ; Tone_Mark
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,7 @@ public enum Indic_Syllabic_Category_Values implements Named {
Other("Other"),
Pure_Killer("Pure_Killer"),
Register_Shifter("Register_Shifter"),
Reordering_Killer("Reordering_Killer"),
Syllable_Modifier("Syllable_Modifier"),
Tone_Letter("Tone_Letter"),
Tone_Mark("Tone_Mark"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1119,21 +1119,30 @@ Value: Nukta

# [Not derivable]
Value: Virama
# Virama (killing of inherent vowel in consonant sequence
# or consonant stacker)
# Virama (kills inherent vowel of consonant; may act as a Pure_Killer
# or Invisible_Stacker depending on context)
# Only includes characters that can act both as visible killer viramas
# and consonant stackers. Separate property values exist for characters
# that can only act as pure killers or only as consonant stackers.
# that can only act as pure killers, only as reordering killers, or only
# as consonant stackers.

# [Derivation: (ccc=9) - (InSC=Pure_Killer) - (InSC=Invisible_Stacker)
# - (InSC=Number_Joiner) - 2D7F]
# - (InSC=Reordering_Killer) - (InSC=Number_Joiner) - 2D7F]
Value: Pure_Killer
# Pure killer (killing of inherent vowel in consonant sequence,
# with no consonant stacking behavior)
# Pure killer (kills inherent vowel of consonant; always visible;
# has no conjuct formation, consonant stacking, or reordering
# behavior)

# [Not derivable]
Value: Reordering_Killer
# Reordering killer (kills inherent vowel of consonant; always visible;
# may cause consonant reordering)

# [Not derivable]
Value: Invisible_Stacker
# Invisible stacker (invisible consonant stacker virama).
# Invisible stacker (usually kills inherent vowel of consonant; is not visible
# by itself; causes conjunct formation or consonant
# stacking)
#
# Note that in some scripts, such as Kharoshthi and Masaram Gondi, an invisible
# stacker may have a second function, changing the shape and/or location of the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -571,8 +571,9 @@ Let $OPInclusions = [\u00A1\u00BF\u2E18\U00013258-\U0001325A\U00013286\U00013288
[\p{LB=CL}\p{LB=CP}] ⊃ [\p{GC=Pe} - [\u2309\u230B]]

# See L2/22-086 for an explanation of the special case of Batak.
# (Note that Consensus 177-C43 changed the Batak viramas from InSC=Pure_Killer to Reordering_Killer.)
\p{LB=VI} = [[\p{Indic_Syllabic_Category=Virama}\p{Indic_Syllabic_Category=Invisible_Stacker}] & $BrahmicLineBreaking]
\p{LB=VF} = [\p{Indic_Syllabic_Category=Pure_Killer} & $VFScripts]
\p{LB=VF} = [\p{Indic_Syllabic_Category=Reordering_Killer} & $VFScripts]

# 15.1: Action item UTC-176-A81: change [[:PCM:]-\u070F] lb=AL->NU
\p{LB=CM} = [[\u3035] \p{GC=Mn} \p{GC=Me} \p{GC=Mc} \p{GC=Cc} \p{GC=Cf} -[\U00013437\U00013438\U0001343C-\U0001343F] -\p{LB=SA} -\p{LB=WJ} -\p{LB=ZW} -\p{LB=BA} -\p{LB=LF} -\p{LB=BK} -\p{LB=CR} -\p{LB=NL} -\p{LB=GL} -\p{LB=AL} -\p{LB=ZWJ} - \p{LB=VI} - \p{LB=VF} - \p{LB=NU}]
Expand Down Expand Up @@ -906,4 +907,4 @@ Let $ideohack = [〆 〇 〡-〩]
[\P{InPC=NA}&\p{gc=Mc}] ⊆ \p{InPC=/(Left|Right)/}
[\P{InPC=NA}&\P{InPC=/(Left|Right)/}] ⊆ [\p{gc=Mn}\p{gc=Lo}]
\p{gc=Mn} ⊆ \P{InPC=/(Left|Right)/}
[\p{gc=Lo}&\P{InPC=NA}&\P{InPC=/(Left|Right)/}] ⊆ [\p{InSC=Consonant_Preceding_Repha}\p{InSC=Consonant_Prefixed}]
[\p{gc=Lo}&\P{InPC=NA}&\P{InPC=/(Left|Right)/}] ⊆ [\p{InSC=Consonant_Preceding_Repha}\p{InSC=Consonant_Prefixed}]