Skip to content

Commit

Permalink
Remove types for general categories
Browse files Browse the repository at this point in the history
  • Loading branch information
ScottPJones committed Dec 15, 2015
1 parent f24826e commit f1e6e8c
Showing 1 changed file with 93 additions and 99 deletions.
192 changes: 93 additions & 99 deletions base/unicode/properties.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,116 +22,110 @@ function charprop end

module Category

"""Unicode character category type"""
abstract General <: Unicode.Property

"""Unicode 'Letter' character category"""
abstract Letter <: General
"""Unicode 'Mark' character category"""
abstract Mark <: General
"""Unicode 'Number' character category"""
abstract Number <: General
"""Unicode 'Punctuation' character category"""
abstract Punctuation <: General
"""Unicode 'Symbol' character category"""
abstract Symbol <: General
"""Unicode 'Separator' character category"""
abstract Separator <: General
"""Unicode 'Other' character category"""
abstract Other <: General

"""Unicode uppercase & titlecase letters"""
abstract Upper <: Letter

"""Unicode alphabetic and numeric"""
typealias AlphaNumeric Union{Letter, Number}

"""Unicode character category code (0-29)"""
bitstype 8 Code

"""Unicode character category mask"""
typealias Mask UInt32

Base.convert(::Type{Code}, x::Integer) = reinterpret(Code, x%UInt8)
Base.convert{T<:Integer}(::Type{T}, x::Code) = convert(T, reinterpret(UInt8, x))
Base.promote_rule{T<:Integer}(::Type{T}, ::Type{Code}) = T
Base.isless(x::Code, y::Code) = isless(UInt8(x), UInt8(y))
Base.isless(x::Code, y::Integer) = isless(UInt8(x), y)
Base.isless(x::Integer, y::Code) = isless(x, UInt8(y))

let c2t = DataType[]
for (nam, val, cat, typ, des) in
((:Cn, 0, :NotAssignedChar, :Other, "Other, Not assigned"),
(:Lu, 1, :UpperCase, :Upper, "Letter, uppercase"),
(:Ll, 2, :LowerCase, :Letter, "Letter, lowercase"),
(:Lt, 3, :TitleCase, :Upper, "Letter, titlecase"),
(:Lm, 4, :ModifierLetter, :Letter, "Letter, modifier"),
(:Lo, 5, :OtherLetter, :Letter, "Letter, other"),
(:Mn, 6, :NonSpacingMark, :Mark, "Mark, nonspacing"),
(:Mc, 7, :CombiningMark, :Mark, "Mark, spacing combining"),
(:Me, 8, :EnclosingMark, :Mark, "Mark, enclosing"),
(:Nd, 9, :DecimalDigit, :Number, "Number, decimal digit"),
(:Nl, 10, :NumericLetter, :Number, "Number, letter"),
(:No, 11, :OtherNumber, :Number, "Number, other"),
(:Pc, 12, :ConnectorPunctuation, :Punctuation, "Punctuation, connector"),
(:Pd, 13, :DashPunctuation, :Punctuation, "Punctuation, dash"),
(:Ps, 14, :OpenPunctuation, :Punctuation, "Punctuation, open"),
(:Pe, 15, :ClosePunctuation, :Punctuation, "Punctuation, close"),
(:Pi, 16, :InitialQuotePunctuation, :Punctuation, "Punctuation, initial quote"),
(:Pf, 17, :FinalQuotePunctuation, :Punctuation, "Punctuation, final quote"),
(:Po, 18, :OtherPunctuation, :Punctuation, "Punctuation, other"),
(:Sm, 19, :MathSymbol, :Symbol, "Symbol, math"),
(:Sc, 20, :CurrencySymbol, :Symbol, "Symbol, currency"),
(:Sk, 21, :ModifierSymbol, :Symbol, "Symbol, modifier"),
(:So, 22, :OtherSymbol, :Symbol, "Symbol, other"),
(:Zs, 23, :SpaceSeparator, :Separator, "Separator, space"),
(:Zl, 24, :LineSeparator, :Separator, "Separator, line"),
(:Zp, 25, :ParagraphSeparator, :Separator, "Separator, paragraph"),
(:Cc, 26, :ControlChar, :Other, "Other, control"),
(:Cf, 27, :FormatChar, :Other, "Other, format"),
(:Cs, 28, :SurrogateChar, :Other, "Other, surrogate"),
(:Co, 29, :PrivateUseChar, :Other, "Other, private use"))
@eval const global $nam = $(Code(val))
@eval abstract $cat <: $typ
@eval push!($c2t, $cat)
@eval Base.convert(::Type{Code}, ct::$cat) = $(Code(val))
@eval @doc $(string("Unicode Category Code: ",des)) $nam
@eval @doc $(string("Unicode Category Type: ",des)) $cat
end
@eval const global code2general = $c2t
"""Unicode character category mask"""
bitstype 32 Mask

Base.convert(::Type{Mask}, x::Integer) = reinterpret(Mask, x%UInt32)
Base.convert{T<:Integer}(::Type{T}, x::Mask) = convert(T, reinterpret(UInt32, x))
Base.promote_rule{T<:Integer}(::Type{T}, ::Type{Mask}) = T

Base.convert(::Type{Mask}, c::Code) = Mask(1<<Int(c))

for (nam, val, cat, typ, des) in
((:Cn, 0, :NotAssignedChar, :Other, "Other, Not assigned"),
(:Lu, 1, :UpperCase, :Upper, "Letter, uppercase"),
(:Ll, 2, :LowerCase, :Letter, "Letter, lowercase"),
(:Lt, 3, :TitleCase, :Upper, "Letter, titlecase"),
(:Lm, 4, :ModifierLetter, :Letter, "Letter, modifier"),
(:Lo, 5, :OtherLetter, :Letter, "Letter, other"),
(:Mn, 6, :NonSpacingMark, :Mark, "Mark, nonspacing"),
(:Mc, 7, :CombiningMark, :Mark, "Mark, spacing combining"),
(:Me, 8, :EnclosingMark, :Mark, "Mark, enclosing"),
(:Nd, 9, :DecimalDigit, :Number, "Number, decimal digit"),
(:Nl, 10, :NumericLetter, :Number, "Number, letter"),
(:No, 11, :OtherNumber, :Number, "Number, other"),
(:Pc, 12, :ConnectorPunctuation, :Punctuation, "Punctuation, connector"),
(:Pd, 13, :DashPunctuation, :Punctuation, "Punctuation, dash"),
(:Ps, 14, :OpenPunctuation, :Punctuation, "Punctuation, open"),
(:Pe, 15, :ClosePunctuation, :Punctuation, "Punctuation, close"),
(:Pi, 16, :InitialQuotePunctuation, :Punctuation, "Punctuation, initial quote"),
(:Pf, 17, :FinalQuotePunctuation, :Punctuation, "Punctuation, final quote"),
(:Po, 18, :OtherPunctuation, :Punctuation, "Punctuation, other"),
(:Sm, 19, :MathSymbol, :Symbol, "Symbol, math"),
(:Sc, 20, :CurrencySymbol, :Symbol, "Symbol, currency"),
(:Sk, 21, :ModifierSymbol, :Symbol, "Symbol, modifier"),
(:So, 22, :OtherSymbol, :Symbol, "Symbol, other"),
(:Zs, 23, :SpaceSeparator, :Separator, "Separator, space"),
(:Zl, 24, :LineSeparator, :Separator, "Separator, line"),
(:Zp, 25, :ParagraphSeparator, :Separator, "Separator, paragraph"),
(:Cc, 26, :ControlChar, :Other, "Other, control"),
(:Cf, 27, :FormatChar, :Other, "Other, format"),
(:Cs, 28, :SurrogateChar, :Other, "Other, surrogate"),
(:Co, 29, :PrivateUseChar, :Other, "Other, private use"))
@eval const global $nam = $(Code(val))
@eval const global $cat = $(Code(val))
@eval @doc $(string("Unicode Category Code: ",des)) $nam
@eval @doc $(string("Unicode Category Code: ",des)) $cat
end

#=
const c2t = [NotAssignedChar, UpperCase, LowerCase, TitleCase, ModifierLetter, OtherLetter,
NonSpacingMark, CombiningMark, EnclosingMark,
DecimalDigit, NumericLetter, OtherNumber,
ConnectorPunctuation, DashPunctuation, OpenPunctuation, ClosePunctuation,
InitialQuotePunctuation, FinalQuotePunctuation, OtherPunctuation,
MathSymbol, CurrencySymbol, ModifierSymbol, OtherSymbol,
SpaceSeparator, LineSeparator, ParagraphSeparator,
ControlChar, FormatChar, SurrogateChar, PrivateUseChar]
=#

Base.convert(::Type{General}, cat::Code) = code2general[Int(cat)+1]

Unicode.charprop(Mask, c) = Mask(1<<Int(charprop(Code, c)))

Base.&(c::Code, m::Mask) = ((1<<Int(c)) & m) != 0
Base.in(c::Code, m::Mask) = ((1<<Int(c)) & m) != 0
Base.in(x::Code, y::Code) = x == y

Base.|(x::Code, y::Code) = Mask((1<<Int(x)) | (1<<Int(y)))
Base.|(c::Code, m::Mask) = Mask((1<<Int(c)) | m)
Base.|(m::Mask, c::Code) = (c | m)
Base.|(x::Mask, y::Mask) = Mask(UInt32(x) | UInt32(y))

Base.|(x::Integer, y::Mask) = x | Int(y)
Base.|(x::Mask, y::Integer) = Int(x) | y
Base.&(x::Integer, y::Mask) = x & Int(y)
Base.&(x::Mask, y::Integer) = Int(x) & y

@eval const global Letter = $(Mask(Lu | Ll | Lt | Lm | Lo))
@doc """Unicode Major Category: Letter (Lu, Ll, Lt, Lm, Lo)""" Letter

@eval const global Mark = $(Mask(Mn | Mc | Me))
@doc """Unicode Major Category: Mark (Mn, Mc, Me)""" Mark

@eval const global Number = $(Mask(Nd | Nl | No))
@doc """Unicode Major Category: Number (Nd, Nl, No)""" Number

@eval const global Symbol = $(Mask(Sm | Sc | Sk | So))
@doc """Unicode Major Category: Symbol (Sm, Sc, Sk, So)""" Symbol

@eval const global Other = $(Mask(Cn | Cc | Cf | Cs | Co))
@doc """Unicode Major Category: Other (Cn, Cc, Cf, Cs, Co)""" Other

@eval const global Punctuation = $(Mask(Pc | Pd | Ps | Pe | Pi | Pf | Po))
@doc """Unicode Major Category: Punctuation (Pc, Pd, Ps, Pe, Pi, Pf, Po)""" Punctuation

@eval const global Separator = $(Mask(Zs | Zl | Zp))
@doc """Unicode Major Category: Separator: (Zs, Zl, Zp)""" Separator

@eval const global Lower = $(Mask(Ll))
@doc """Unicode Category: Lower = LowerCase""" Lower

@eval const global Upper = $(Mask(Lu | Lt))
@doc """Unicode Combined Categories: Upper = UpperCase | TitleCase""" Upper

@eval const global UpperMask = $(Lu | Lt)
@eval const global AlphaMask = $(Lu | Ll | Lt | Lm | Lo)
@eval const global NumberMask = $(Nd | Nl | No)
@eval const global AlphaNumericMask = AlphaMask | NumberMask
@eval const global AlphaNumeric = Letter | Number
@doc """Unicode Combined Categories: AlphaNumberic = Letter | Number""" AlphaNumeric

let mask = 0 ; for i = Int(Pc):Int(Po) ; mask |= (1<<i) ; end
@eval const global PunctuationMask = $(Mask(mask))
mask = 0 ; for i = Int(Lu):Int(So) ; mask |= (1<<i) ; end
@eval const global GraphMask = $(Mask(mask))
@eval const global PrintMask = $(Mask(mask) | Zs)
let mask = 0 ; for i = Int(Lu):Int(So) ; mask |= (1<<i) ; end
@eval const global Graph = $(Mask(mask))
@doc """Unicode Combined Categories: Graph (true if printer would use ink)""" Graph
@eval const global Print = $(Mask(mask) | Zs)
@doc """Unicode Combined Categories: Print""" Print
end

end # module Cat
Expand All @@ -144,14 +138,14 @@ is_assigned_char(c) = charprop(Category.Code, c) != Category.Cn
islower(c::Char) = charprop(Category.Code, c) == Category.Ll

# true for Unicode upper and mixed case
isupper(c::Char) = charprop(Category.Code, c) & Category.UpperMask
isalpha(c::Char) = charprop(Category.Code, c) & Category.AlphaMask
isnumber(c::Char) = charprop(Category.Code, c) & Category.NumberMask
isalnum(c::Char) = charprop(Category.Code, c) & Category.AlphaNumericMask
ispunct(c::Char) = charprop(Category.Code, c) & Category.PunctuationMask
isprint(c::Char) = charprop(Category.Code, c) & Category.PrintMask
isupper(c::Char) = charprop(Category.Code, c) in Category.Upper
isalpha(c::Char) = charprop(Category.Code, c) in Category.Letter
isnumber(c::Char) = charprop(Category.Code, c) in Category.Number
isalnum(c::Char) = charprop(Category.Code, c) in Category.AlphaNumeric
ispunct(c::Char) = charprop(Category.Code, c) in Category.Punctuation
isprint(c::Char) = charprop(Category.Code, c) in Category.Print
# true in principle if a printer would use ink
isgraph(c::Char) = charprop(Category.Code, c) & Category.GraphMask
isgraph(c::Char) = charprop(Category.Code, c) in Category.Graph

isdigit(c::Char) = ('0' <= c <= '9')

Expand Down

0 comments on commit f1e6e8c

Please sign in to comment.