@@ -59,67 +59,79 @@ Base.isless(x::Code, y::Code) = isless(UInt8(x), UInt8(y))
59
59
Base. isless (x:: Code , y:: Integer ) = isless (UInt8 (x), y)
60
60
Base. isless (x:: Integer , y:: Code ) = isless (x, UInt8 (y))
61
61
62
- for (nam, val, cat, typ, des) in
63
- ((:Cn , 0 , :NotAssignedChar , :Other , " Other, Not assigned" ),
64
- (:Lu , 1 , :UpperCase , :Upper , " Letter, uppercase" ),
65
- (:Ll , 2 , :LowerCase , :Letter , " Letter, lowercase" ),
66
- (:Lt , 3 , :TitleCase , :Upper , " Letter, titlecase" ),
67
- (:Lm , 4 , :ModifierLetter , :Letter , " Letter, modifier" ),
68
- (:Lo , 5 , :OtherLetter , :Letter , " Letter, other" ),
69
- (:Mn , 6 , :NonSpacingMark , :Mark , " Mark, nonspacing" ),
70
- (:Mc , 7 , :CombiningMark , :Mark , " Mark, spacing combining" ),
71
- (:Me , 8 , :EnclosingMark , :Mark , " Mark, enclosing" ),
72
- (:Nd , 9 , :DecimalDigit , :Number , " Number, decimal digit" ),
73
- (:Nl , 10 , :NumericLetter , :Number , " Number, letter" ),
74
- (:No , 11 , :OtherNumber , :Number , " Number, other" ),
75
- (:Pc , 12 , :ConnectorPunctuation , :Punctuation , " Punctuation, connector" ),
76
- (:Pd , 13 , :DashPunctuation , :Punctuation , " Punctuation, dash" ),
77
- (:Ps , 14 , :OpenPunctuation , :Punctuation , " Punctuation, open" ),
78
- (:Pe , 15 , :ClosePunctuation , :Punctuation , " Punctuation, close" ),
79
- (:Pi , 16 , :InitialQuotePunctuation , :Punctuation , " Punctuation, initial quote" ),
80
- (:Pf , 17 , :FinalQuotePunctuation , :Punctuation , " Punctuation, final quote" ),
81
- (:Po , 18 , :OtherPunctuation , :Punctuation , " Punctuation, other" ),
82
- (:Sm , 19 , :MathSymbol , :Symbol , " Symbol, math" ),
83
- (:Sc , 20 , :CurrencySymbol , :Symbol , " Symbol, currency" ),
84
- (:Sk , 21 , :ModifierSymbol , :Symbol , " Symbol, modifier" ),
85
- (:So , 22 , :OtherSymbol , :Symbol , " Symbol, other" ),
86
- (:Zs , 23 , :SpaceSeparator , :Separator , " Separator, space" ),
87
- (:Zl , 24 , :LineSeparator , :Separator , " Separator, line" ),
88
- (:Zp , 25 , :ParagraphSeparator , :Separator , " Separator, paragraph" ),
89
- (:Cc , 26 , :ControlChar , :Other , " Other, control" ),
90
- (:Cf , 27 , :FormatChar , :Other , " Other, format" ),
91
- (:Cs , 28 , :SurrogateChar , :Other , " Other, surrogate" ),
92
- (:Co , 29 , :PrivateUseChar , :Other , " Other, private use" ))
93
- @eval const global $ nam = $ (Code (val))
94
- @eval abstract $ cat <: $typ
95
- @eval Base. convert (:: Type{Code} , ct:: $cat ) = $ (Code (val))
96
- @eval @doc $ (string (" Unicode Category Code: " ,des)) $ nam
97
- @eval @doc $ (string (" Unicode Category Type: " ,des)) $ cat
62
+ let c2t = DataType[]
63
+ for (nam, val, cat, typ, des) in
64
+ ((:Cn , 0 , :NotAssignedChar , :Other , " Other, Not assigned" ),
65
+ (:Lu , 1 , :UpperCase , :Upper , " Letter, uppercase" ),
66
+ (:Ll , 2 , :LowerCase , :Letter , " Letter, lowercase" ),
67
+ (:Lt , 3 , :TitleCase , :Upper , " Letter, titlecase" ),
68
+ (:Lm , 4 , :ModifierLetter , :Letter , " Letter, modifier" ),
69
+ (:Lo , 5 , :OtherLetter , :Letter , " Letter, other" ),
70
+ (:Mn , 6 , :NonSpacingMark , :Mark , " Mark, nonspacing" ),
71
+ (:Mc , 7 , :CombiningMark , :Mark , " Mark, spacing combining" ),
72
+ (:Me , 8 , :EnclosingMark , :Mark , " Mark, enclosing" ),
73
+ (:Nd , 9 , :DecimalDigit , :Number , " Number, decimal digit" ),
74
+ (:Nl , 10 , :NumericLetter , :Number , " Number, letter" ),
75
+ (:No , 11 , :OtherNumber , :Number , " Number, other" ),
76
+ (:Pc , 12 , :ConnectorPunctuation , :Punctuation , " Punctuation, connector" ),
77
+ (:Pd , 13 , :DashPunctuation , :Punctuation , " Punctuation, dash" ),
78
+ (:Ps , 14 , :OpenPunctuation , :Punctuation , " Punctuation, open" ),
79
+ (:Pe , 15 , :ClosePunctuation , :Punctuation , " Punctuation, close" ),
80
+ (:Pi , 16 , :InitialQuotePunctuation , :Punctuation , " Punctuation, initial quote" ),
81
+ (:Pf , 17 , :FinalQuotePunctuation , :Punctuation , " Punctuation, final quote" ),
82
+ (:Po , 18 , :OtherPunctuation , :Punctuation , " Punctuation, other" ),
83
+ (:Sm , 19 , :MathSymbol , :Symbol , " Symbol, math" ),
84
+ (:Sc , 20 , :CurrencySymbol , :Symbol , " Symbol, currency" ),
85
+ (:Sk , 21 , :ModifierSymbol , :Symbol , " Symbol, modifier" ),
86
+ (:So , 22 , :OtherSymbol , :Symbol , " Symbol, other" ),
87
+ (:Zs , 23 , :SpaceSeparator , :Separator , " Separator, space" ),
88
+ (:Zl , 24 , :LineSeparator , :Separator , " Separator, line" ),
89
+ (:Zp , 25 , :ParagraphSeparator , :Separator , " Separator, paragraph" ),
90
+ (:Cc , 26 , :ControlChar , :Other , " Other, control" ),
91
+ (:Cf , 27 , :FormatChar , :Other , " Other, format" ),
92
+ (:Cs , 28 , :SurrogateChar , :Other , " Other, surrogate" ),
93
+ (:Co , 29 , :PrivateUseChar , :Other , " Other, private use" ))
94
+ @eval const global $ nam = $ (Code (val))
95
+ @eval abstract $ cat <: $typ
96
+ @eval push! ($ c2t, $ cat)
97
+ @eval Base. convert (:: Type{Code} , ct:: $cat ) = $ (Code (val))
98
+ @eval @doc $ (string (" Unicode Category Code: " ,des)) $ nam
99
+ @eval @doc $ (string (" Unicode Category Type: " ,des)) $ cat
100
+ end
101
+ @eval const global code2general = $ c2t
98
102
end
99
103
104
+ #=
100
105
const c2t = [NotAssignedChar, UpperCase, LowerCase, TitleCase, ModifierLetter, OtherLetter,
101
106
NonSpacingMark, CombiningMark, EnclosingMark,
102
107
DecimalDigit, NumericLetter, OtherNumber,
103
108
ConnectorPunctuation, DashPunctuation, OpenPunctuation, ClosePunctuation,
104
109
InitialQuotePunctuation, FinalQuotePunctuation, OtherPunctuation,
105
110
MathSymbol, CurrencySymbol, ModifierSymbol, OtherSymbol,
106
111
SpaceSeparator, LineSeparator, ParagraphSeparator,
107
- ControlChar, FormatChar, SurrogateChar, PrivateUseChar]
112
+ ControlChar, FormatChar, SurrogateChar, PrivateUseChar]
113
+ =#
108
114
109
- Base. convert (:: Type{General} , cat:: Code ) = c2t [Int (cat)+ 1 ]
115
+ Base. convert (:: Type{General} , cat:: Code ) = code2general [Int (cat)+ 1 ]
110
116
111
117
Unicode. charprop (Mask, c) = Mask (1 << Int (charprop (Code, c)))
112
118
113
- const global UpperMask = Mask (1 << Int (Lu) | 1 << Int (Lt))
114
- const global AlphaMask = Mask (1 << Int (Lu) | 1 << Int (Ll) | 1 << Int (Lt) | 1 << Int (Lm) | 1 << Int (Lo))
115
- const global NumberMask = Mask ((1 << Int (Nd) | 1 << Int (Nl) | 1 << Int (No)))
116
- const global AlphaNumericMask = AlphaMask | NumberMask
119
+ Base.& (c:: Code , m:: Mask ) = ((1 << Int (c)) & m) != 0
120
+
121
+ Base.| (x:: Code , y:: Code ) = Mask ((1 << Int (x)) | (1 << Int (y)))
122
+ Base.| (c:: Code , m:: Mask ) = Mask ((1 << Int (c)) | m)
123
+ Base.| (m:: Mask , c:: Code ) = (c | m)
124
+
125
+ @eval const global UpperMask = $ (Lu | Lt)
126
+ @eval const global AlphaMask = $ (Lu | Ll | Lt | Lm | Lo)
127
+ @eval const global NumberMask = $ (Nd | Nl | No)
128
+ @eval const global AlphaNumericMask = AlphaMask | NumberMask
117
129
118
130
let mask = 0 ; for i = Int (Pc): Int (Po) ; mask |= (1 << i) ; end
119
131
@eval const global PunctuationMask = $ (Mask (mask))
120
132
mask = 0 ; for i = Int (Lu): Int (So) ; mask |= (1 << i) ; end
121
133
@eval const global GraphMask = $ (Mask (mask))
122
- @eval const global PrintMask = $ (Mask (mask | ( 1 << Int (Zs))) )
134
+ @eval const global PrintMask = $ (Mask (mask) | Zs )
123
135
end
124
136
125
137
end # module Cat
@@ -129,17 +141,17 @@ importall .Category
129
141
130
142
is_assigned_char (c) = charprop (Category. Code, c) != Category. Cn
131
143
132
- islower (c:: Char ) = charprop (Category. Code, c) == Category. Ll
144
+ islower (c:: Char ) = charprop (Category. Code, c) == Category. Ll
133
145
134
146
# true for Unicode upper and mixed case
135
- isupper (c:: Char ) = ( charprop (Category. Mask , c) & Category. UpperMask) != 0
136
- isalpha (c:: Char ) = ( charprop (Category. Mask , c) & Category. AlphaMask) != 0
137
- isnumber (c:: Char ) = ( charprop (Category. Mask , c) & Category. NumberMask) != 0
138
- isalnum (c:: Char ) = ( charprop (Category. Mask , c) & Category. AlphaNumericMask) != 0
139
- ispunct (c:: Char ) = ( charprop (Category. Mask , c) & Category. PunctuationMask) != 0
140
- isprint (c:: Char ) = ( charprop (Category. Mask , c) & Category. PrintMask) != 0
147
+ isupper (c:: Char ) = charprop (Category. Code , c) & Category. UpperMask
148
+ isalpha (c:: Char ) = charprop (Category. Code , c) & Category. AlphaMask
149
+ isnumber (c:: Char ) = charprop (Category. Code , c) & Category. NumberMask
150
+ isalnum (c:: Char ) = charprop (Category. Code , c) & Category. AlphaNumericMask
151
+ ispunct (c:: Char ) = charprop (Category. Code , c) & Category. PunctuationMask
152
+ isprint (c:: Char ) = charprop (Category. Code , c) & Category. PrintMask
141
153
# true in principle if a printer would use ink
142
- isgraph (c:: Char ) = ( charprop (Category. Mask , c) & Category. GraphMask) != 0
154
+ isgraph (c:: Char ) = charprop (Category. Code , c) & Category. GraphMask
143
155
144
156
isdigit (c:: Char ) = (' 0' <= c <= ' 9' )
145
157
0 commit comments