Skip to content

Commit fbffc85

Browse files
committed
Update to use submodule
1 parent 100bc61 commit fbffc85

File tree

5 files changed

+89
-85
lines changed

5 files changed

+89
-85
lines changed

base/exports.jl

+3-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ export
2222
Serializer,
2323
Docs,
2424
Markdown,
25+
Cat,
2526
Unicode,
2627

2728
# Types
@@ -41,8 +42,8 @@ export
4142
CartesianIndex,
4243
CartesianRange,
4344
Channel,
44-
CharCategory,
45-
CharCategoryCode,
45+
CharCode,
46+
CharType,
4647
Cmd,
4748
Colon,
4849
Complex,

base/unicode.jl

+1-3
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@ import Base: string, convert, write, length, endof, next, reverseind, lastidx, r
66
lowercase, uppercase, eltype, isless, promote_rule, ==
77

88
export UnicodeError, UTF16String, UTF32String, unsafe_checkstring, checkstring,
9-
utf8, utf16, utf32, containsnul, WString, wstring,
10-
charprop, CharCategoryCode, UnicodeProperty, CharCategory, CatLetter, CatMark, CatNumber,
11-
CatPunctuation, CatSymbol, CatSeparator, CatOther, CatUpper,
9+
utf8, utf16, utf32, containsnul, WString, wstring, charprop, Cat, CharType, CharCode,
1210
is_assigned_char, islower, isupper, isdigit, isalpha, isnumber, isalnum, iscntrl,
1311
ispunct, isspace, isprint, isgraph,
1412
isgraphemebreak, GraphemeIterator, normalize_string, graphemes, charwidth

base/unicode/properties.jl

+81-75
Original file line numberDiff line numberDiff line change
@@ -12,71 +12,78 @@ isvalid(ch::Char) = isvalid(Char, ch)
1212

1313
# Unicode General Category constants
1414

15+
module Cat
16+
export Property, CharType, CharCode
17+
1518
"""Unicode character properties"""
16-
abstract UnicodeProperty
17-
"""Unicode character categories"""
18-
abstract CharCategory <: UnicodeProperty
19-
20-
"""Unicode letter character category"""
21-
abstract CatLetter <: CharCategory
22-
"""Unicode Mark character category"""
23-
abstract CatMark <: CharCategory
24-
"""Unicode Numeric character category"""
25-
abstract CatNumber <: CharCategory
26-
"""Unicode Punctuation character category"""
27-
abstract CatPunctuation <: CharCategory
28-
"""Unicode Symbol character category"""
29-
abstract CatSymbol <: CharCategory
30-
"""Unicode Separator character category"""
31-
abstract CatSeparator <: CharCategory
32-
"""Unicode Other character category"""
33-
abstract CatOther <: CharCategory
19+
abstract Property
20+
21+
"""Unicode character category type"""
22+
abstract CharType <: Property
23+
24+
"""Unicode 'Letter' character category"""
25+
abstract Letter <: CharType
26+
"""Unicode 'Mark' character category"""
27+
abstract Mark <: CharType
28+
"""Unicode 'Number' character category"""
29+
abstract Number <: CharType
30+
"""Unicode 'Punctuation' character category"""
31+
abstract Punctuation <: CharType
32+
"""Unicode 'Symbol' character category"""
33+
abstract Symbol <: CharType
34+
"""Unicode 'Separator' character category"""
35+
abstract Separator <: CharType
36+
"""Unicode 'Other' character category"""
37+
abstract Other <: CharType
3438

3539
"""Unicode uppercase & titlecase letters"""
36-
abstract CatUpper <: CatLetter
40+
abstract Upper <: Letter
41+
42+
"""Unicode character category code (0-29)"""
43+
bitstype 8 CharCode
3744

38-
"""Unicode Character Category Code (0-29)"""
39-
bitstype 8 CharCategoryCode
45+
end # module Cat
46+
import .Cat: Property, CharType, CharCode
4047

41-
convert(::Type{CharCategoryCode}, x::Integer) = reinterpret(CharCategoryCode, x%UInt8)
42-
convert{T<:Integer}(::Type{T}, x::CharCategoryCode) = convert(T, reinterpret(UInt8, x))
43-
promote_rule{T<:Integer}(::Type{T}, ::Type{CharCategoryCode}) = T
44-
isless(x::CharCategoryCode, y::CharCategoryCode) = isless(UInt32(x), UInt32(y))
45-
isless(x::CharCategoryCode, y::Integer) = isless(UInt32(x), y)
46-
isless(x::Integer, y::CharCategoryCode) = isless(x, UInt32(y))
48+
convert(::Type{CharCode}, x::Integer) = reinterpret(CharCode, x%UInt8)
49+
convert{T<:Integer}(::Type{T}, x::CharCode) = convert(T, reinterpret(UInt8, x))
50+
promote_rule{T<:Integer}(::Type{T}, ::Type{CharCode}) = T
51+
isless(x::CharCode, y::CharCode) = isless(UInt8(x), UInt8(y))
52+
isless(x::CharCode, y::Integer) = isless(UInt8(x), y)
53+
isless(x::Integer, y::CharCode) = isless(x, UInt8(y))
4754

4855
for (nam, val, cat, typ, des) in
49-
((:Cn, 0, :NotAssignedChar, CatOther, "Other, Not assigned"),
50-
(:Lu, 1, :UpperCase, CatUpper, "Letter, uppercase"),
51-
(:Ll, 2, :LowerCase, CatLetter, "Letter, lowercase"),
52-
(:Lt, 3, :TitleCase, CatUpper, "Letter, titlecase"),
53-
(:Lm, 4, :ModifierLetter, CatLetter, "Letter, modifier"),
54-
(:Lo, 5, :OtherLetter, CatLetter, "Letter, other"),
55-
(:Mn, 6, :NonSpacingMark, CatMark, "Mark, nonspacing"),
56-
(:Mc, 7, :CombiningMark, CatMark, "Mark, spacing combining"),
57-
(:Me, 8, :EnclosingMark, CatMark, "Mark, enclosing"),
58-
(:Nd, 9, :DecimalDigit, CatNumber, "Number, decimal digit"),
59-
(:Nl, 10, :NumericLetter, CatNumber, "Number, letter"),
60-
(:No, 11, :OtherNumber, CatNumber, "Number, other"),
61-
(:Pc, 12, :ConnectorPunct, CatPunctuation, "Punctuation, connector"),
62-
(:Pd, 13, :DashPunct, CatPunctuation, "Punctuation, dash"),
63-
(:Ps, 14, :OpenPunct, CatPunctuation, "Punctuation, open"),
64-
(:Pe, 15, :ClosePunct, CatPunctuation, "Punctuation, close"),
65-
(:Pi, 16, :BegQuotePunct, CatPunctuation, "Punctuation, initial quote"),
66-
(:Pf, 17, :EndQuotePunct, CatPunctuation, "Punctuation, final quote"),
67-
(:Po, 18, :OtherPunct, CatPunctuation, "Punctuation, other"),
68-
(:Sm, 19, :MathSymbol, CatSymbol, "Symbol, math"),
69-
(:Sc, 20, :CurrencySymbol, CatSymbol, "Symbol, currency"),
70-
(:Sk, 21, :ModifierSymbol, CatSymbol, "Symbol, modifier"),
71-
(:So, 22, :OtherSymbol, CatSymbol, "Symbol, other"),
72-
(:Zs, 23, :SpaceSeparator, CatSeparator, "Separator, space"),
73-
(:Zl, 24, :LineSeparator, CatSeparator, "Separator, line"),
74-
(:Zp, 25, :ParagraphSeparator, CatSeparator, "Separator, paragraph"),
75-
(:Cc, 26, :ControlChar, CatOther, "Other, control"),
76-
(:Cf, 27, :FormatChar, CatOther, "Other, format"),
77-
(:Cs, 28, :SurrogateChar, CatOther, "Other, surrogate"),
78-
(:Co, 29, :PrivateUseChar, CatOther, "Other, private use"))
79-
@eval const global $nam = CharCategoryCode($val)
56+
((:Cn, 0, :NotAssignedChar, Cat.Other, "Other, Not assigned"),
57+
(:Lu, 1, :UpperCase, Cat.Upper, "Letter, uppercase"),
58+
(:Ll, 2, :LowerCase, Cat.Letter, "Letter, lowercase"),
59+
(:Lt, 3, :TitleCase, Cat.Upper, "Letter, titlecase"),
60+
(:Lm, 4, :ModifierLetter, Cat.Letter, "Letter, modifier"),
61+
(:Lo, 5, :OtherLetter, Cat.Letter, "Letter, other"),
62+
(:Mn, 6, :NonSpacingMark, Cat.Mark, "Mark, nonspacing"),
63+
(:Mc, 7, :CombiningMark, Cat.Mark, "Mark, spacing combining"),
64+
(:Me, 8, :EnclosingMark, Cat.Mark, "Mark, enclosing"),
65+
(:Nd, 9, :DecimalDigit, Cat.Number, "Number, decimal digit"),
66+
(:Nl, 10, :NumericLetter, Cat.Number, "Number, letter"),
67+
(:No, 11, :OtherNumber, Cat.Number, "Number, other"),
68+
(:Pc, 12, :ConnectorPunct, Cat.Punctuation, "Punctuation, connector"),
69+
(:Pd, 13, :DashPunct, Cat.Punctuation, "Punctuation, dash"),
70+
(:Ps, 14, :OpenPunct, Cat.Punctuation, "Punctuation, open"),
71+
(:Pe, 15, :ClosePunct, Cat.Punctuation, "Punctuation, close"),
72+
(:Pi, 16, :BegQuotePunct, Cat.Punctuation, "Punctuation, initial quote"),
73+
(:Pf, 17, :EndQuotePunct, Cat.Punctuation, "Punctuation, final quote"),
74+
(:Po, 18, :OtherPunct, Cat.Punctuation, "Punctuation, other"),
75+
(:Sm, 19, :MathSymbol, Cat.Symbol, "Symbol, math"),
76+
(:Sc, 20, :CurrencySymbol, Cat.Symbol, "Symbol, currency"),
77+
(:Sk, 21, :ModifierSymbol, Cat.Symbol, "Symbol, modifier"),
78+
(:So, 22, :OtherSymbol, Cat.Symbol, "Symbol, other"),
79+
(:Zs, 23, :SpaceSeparator, Cat.Separator, "Separator, space"),
80+
(:Zl, 24, :LineSeparator, Cat.Separator, "Separator, line"),
81+
(:Zp, 25, :ParagraphSeparator, Cat.Separator, "Separator, paragraph"),
82+
(:Cc, 26, :ControlChar, Cat.Other, "Other, control"),
83+
(:Cf, 27, :FormatChar, Cat.Other, "Other, format"),
84+
(:Cs, 28, :SurrogateChar, Cat.Other, "Other, surrogate"),
85+
(:Co, 29, :PrivateUseChar, Cat.Other, "Other, private use"))
86+
@eval const global $nam = CharCode($val)
8087
@eval export $cat
8188
@eval abstract $cat <: $typ
8289
@eval @doc $(string("Unicode Category Code: ",des)) $nam
@@ -94,54 +101,53 @@ const c2t = [NotAssignedChar, UpperCase, LowerCase, TitleCase, ModifierLetter, O
94101

95102
############################################################################
96103

97-
98104
"""
99105
Return various Unicode properties for character
100106
"""
101107
function charprop end
102108

103-
charprop(::Type{CharCategory}, c) = c2t[Int(charprop(CharCategoryCode, c))+1]
109+
charprop(::Type{CharType}, c) = c2t[Int(charprop(CharCode, c))+1]
104110

105-
is_assigned_char(c) = charprop(CharCategoryCode, c) != Cn
111+
is_assigned_char(c) = charprop(CharCode, c) != Cn
106112

107113
## libc character class predicates ##
108114

109-
islower(c::Char) = charprop(CharCategoryCode, c) == Ll
115+
islower(c::Char) = charprop(CharCode, c) == Ll
110116

111117
# true for Unicode upper and mixed case
112-
isupper(c::Char) = (ccode = charprop(CharCategoryCode, c)) == Lu || ccode == Lt
118+
isupper(c::Char) = (ccode = charprop(CharCode, c)) == Lu || ccode == Lt
113119

114120
isdigit(c::Char) = ('0' <= c <= '9')
115-
isalpha(c::Char) = (Lu <= charprop(CharCategoryCode, c) <= Lo)
116-
isnumber(c::Char) = (Nd <= charprop(CharCategoryCode, c) <= No)
117-
isalnum(c::Char) = (Lu <= (ccode = charprop(CharCategoryCode, c)) <= Lo) || (Nd <= ccode <= No)
121+
isalpha(c::Char) = (Lu <= charprop(CharCode, c) <= Lo)
122+
isnumber(c::Char) = (Nd <= charprop(CharCode, c) <= No)
123+
isalnum(c::Char) = (Lu <= (ccode = charprop(CharCode, c)) <= Lo) || (Nd <= ccode <= No)
118124

119125
# These are about 3 times slower, because the isa method
120126
# is much slower than checking if an integer is within range (or two ranges)
121127
# If that is sped up, then these, which are more readable, could replace the other forms.
122128
#=
123-
isalpha(c::Char) = charprop(CharCategory, c) <: CatLetter
124-
isnumber(c::Char) = charprop(CharCategory, c) <: CatNumber
125-
isupper(c::Char) = charprop(CharCategory, c) <: CatUpper
126-
isalnum(c::Char) = charprop(CharCategory, c) <: Union{CatLetter, CatNumber}
127-
ispunct(c::Char) = charprop(CharCategory, c) <: CatPunctuation
129+
isalpha(c::Char) = charprop(CharType, c) <: CatLetter
130+
isnumber(c::Char) = charprop(CharType, c) <: CatNumber
131+
isupper(c::Char) = charprop(CharType, c) <: CatUpper
132+
isalnum(c::Char) = charprop(CharType, c) <: Union{CatLetter, CatNumber}
133+
ispunct(c::Char) = charprop(CharType, c) <: CatPunctuation
128134
=#
129135

130136
# following C++ only control characters from the Latin-1 subset return true
131137
iscntrl(c::Char) = (c <= Char(0x1f) || Char(0x7f) <= c <= Char(0x9f))
132138

133-
ispunct(c::Char) = (Pc <= charprop(CharCategoryCode, c) <= Po)
139+
ispunct(c::Char) = (Pc <= charprop(CharCode, c) <= Po)
134140

135141
# \u85 is the Unicode Next Line (NEL) character
136142
# the check for \ufffd allows for branch removal on ASCIIStrings
137143
@inline isspace(c::Char) =
138144
(c == ' ' || '\t' <= c <='\r' || c == '\u85' ||
139-
('\ua0' <= c && c != '\ufffd' && charprop(CharCategoryCode, c) == Zs))
145+
('\ua0' <= c && c != '\ufffd' && charprop(CharCode, c) == Zs))
140146

141-
isprint(c::Char) = (Lu <= charprop(CharCategoryCode, c) <= Zs)
147+
isprint(c::Char) = (Lu <= charprop(CharCode, c) <= Zs)
142148

143149
# true in principle if a printer would use ink
144-
isgraph(c::Char) = (Lu <= charprop(CharCategoryCode, c) <= So)
150+
isgraph(c::Char) = (Lu <= charprop(CharCode, c) <= So)
145151

146152
for name = ("alnum", "alpha", "cntrl", "digit", "number", "graph",
147153
"lower", "print", "punct", "space", "upper")

base/unicode/utf8proc.jl

+2-3
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,8 @@ uppercase(c::Char) = (isascii(c)
8181

8282
############################################################################
8383

84-
# returns CharCategoryCode (enum values 0:29) giving Unicode category
85-
charprop(::Type{CharCategoryCode}, c) =
86-
CharCategoryCode(ccall(:utf8proc_category, Cint, (UInt32,), c))
84+
# returns CharCode (values 0:29) giving Unicode category
85+
charprop(::Type{CharCode}, c) = CharCode(ccall(:utf8proc_category, Cint, (UInt32,), c))
8786

8887
############################################################################
8988

test/unicode/properties.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,9 @@ end
143143

144144
# check handling of CN category constants
145145
let c_ll = 'β', c_cn = '\u038B'
146-
@test charprop(CharCategoryCode, c_ll) == Unicode.Ll
146+
@test charprop(CharCode, c_ll) == Unicode.Ll
147147
# check codepoint with category code CN
148-
@test charprop(CharCategoryCode, c_cn) == Unicode.Cn
148+
@test charprop(CharCode, c_cn) == Unicode.Cn
149149
end
150150

151151
# Make sure fastplus is called for coverage

0 commit comments

Comments
 (0)