Skip to content

Commit

Permalink
add AbstractChar supertype of Char
Browse files Browse the repository at this point in the history
  • Loading branch information
stevengj committed Mar 2, 2018
1 parent df489f0 commit 63e04bf
Show file tree
Hide file tree
Showing 40 changed files with 492 additions and 412 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,9 @@ Library improvements
* The function `thisind(s::AbstractString, i::Integer)` returns the largest valid index
less or equal than `i` in the string `s` or `0` if no such index exists ([#24414]).

* `Char` is now a subtype of `AbstractChar`, and most of the functions that
take character arguments now accept any `AbstractChar`.

* `Irrational` is now a subtype of `AbstractIrrational` ([#24245]).

* Introduced the `empty` function, the functional pair to `empty!` which returns a new,
Expand Down
2 changes: 1 addition & 1 deletion base/arrayshow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ methods. By default returns a string of the same width as original with a
centered cdot, used in printing of structural zeros of structured matrices.
Accept keyword args `c` for alternate single character marker.
"""
function replace_with_centered_mark(s::AbstractString;c::Char = '')
function replace_with_centered_mark(s::AbstractString;c::AbstractChar = '')
N = length(s)
return join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2)))
end
Expand Down
14 changes: 8 additions & 6 deletions base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ export
Signed, Int, Int8, Int16, Int32, Int64, Int128,
Unsigned, UInt, UInt8, UInt16, UInt32, UInt64, UInt128,
# string types
Char, AbstractString, String, IO,
AbstractChar, Char, AbstractString, String, IO,
# errors
ErrorException, BoundsError, DivideError, DomainError, Exception,
InterruptException, InexactError, OutOfMemoryError, ReadOnlyMemoryError,
Expand Down Expand Up @@ -177,7 +177,8 @@ primitive type Float32 <: AbstractFloat 32 end
primitive type Float64 <: AbstractFloat 64 end

#primitive type Bool <: Integer 8 end
primitive type Char 32 end
abstract type AbstractChar end
primitive type Char <: AbstractChar 32 end

primitive type Int8 <: Signed 8 end
#primitive type UInt8 <: Unsigned 8 end
Expand Down Expand Up @@ -460,7 +461,7 @@ function write(io::IO, x::String)
end

show(io::IO, @nospecialize x) = ccall(:jl_static_show, Cvoid, (Ptr{Cvoid}, Any), io_pointer(io), x)
print(io::IO, x::Char) = ccall(:jl_uv_putc, Cvoid, (Ptr{Cvoid}, Char), io_pointer(io), x)
print(io::IO, x::AbstractChar) = ccall(:jl_uv_putc, Cvoid, (Ptr{Cvoid}, Char), io_pointer(io), x)
print(io::IO, x::String) = (write(io, x); nothing)
print(io::IO, @nospecialize x) = show(io, x)
print(io::IO, @nospecialize(x), @nospecialize a...) = (print(io, x); print(io, a...))
Expand Down Expand Up @@ -701,9 +702,10 @@ UInt32(x::BuiltinInts) = toUInt32(x)::UInt32
UInt64(x::BuiltinInts) = toUInt64(x)::UInt64
UInt128(x::BuiltinInts) = toUInt128(x)::UInt128

Char(x::Number) = Char(UInt32(x))
Char(x::Char) = x
(::Type{T})(x::Char) where {T<:Number} = T(UInt32(x))
(::Type{T})(x::Number) where {T<:AbstractChar} = T(UInt32(x))
(::Type{AbstractChar})(x::Number) = Char(x)
(::Type{T})(x::AbstractChar) where {T<:Union{Number,AbstractChar}} = T(UInt32(x))
(::Type{T})(x::T) where {T<:AbstractChar} = x

(::Type{T})(x::T) where {T<:Number} = x

Expand Down
117 changes: 83 additions & 34 deletions base/char.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,49 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

struct InvalidCharError <: Exception
char::Char
"""
The `AbstractChar` type is the supertype of all character implementations
in Julia. A character represents a Unicode code point, and can be converted
to/from `UInt32` in order to obtain the numerical value of the code point.
These numerical values determine how characters are compared with `<` and `==`,
for example.
A given `AbstractChar` subtype may be capable of representing only a subset
of Unicode, in which case conversion from an unsupported `UInt32` value
may throw an error. Conversely, the built-in [`Char`](@ref) type represents
a *superset* of Unicode (in order to losslessly encode invalid byte streams),
in which case conversion of a non-Unicode value *to* `UInt32` throws an error.
The [`isvalid`](@ref) function can be used to check which codepoints are
representable in a given `AbstractChar` type.
Internally, an `AbstractChar` type may use a variety of encodings. Conversion
to `UInt32` will not reveal this encoding because it always returns the
Unicode value of the character. (Typically, the raw encoding can be obtained
via [`reinterpret`](@ref).)
"""
AbstractChar

"""
Char(c::Union{Number,AbstractChar})
`Char` is a 32-bit [`AbstractChar`](@ref) type that is the default representation
of characters in Julia. `Char` is the type used for character literals like `'x'`
and it is also the element type of [`String`](@ref).
In order to losslessly represent arbitrary byte streams stored in a `String`,
a `Char` value may store information that cannot be converted to a Unicode
codepoint — converting such a `Char` to `UInt32` will throw an error.
The [`isvalid(c::Char)`](@ref) function can be used to query whether `c`
represents a valid Unicode character.
"""
Char

struct InvalidCharError{T<:AbstractChar} <: Exception
char::T
end
struct CodePointError <: Exception
code::Integer
end
@noinline invalid_char(c::Char) = throw(InvalidCharError(c))
@noinline invalid_char(c::AbstractChar) = throw(InvalidCharError(c))
@noinline code_point_err(u::UInt32) = throw(CodePointError(u))

function ismalformed(c::Char)
Expand All @@ -24,6 +61,11 @@ function isoverlong(c::Char)
is_overlong_enc(u)
end

# fallback: other AbstractChar types, by default, are assumed
# not to support malformed or overlong encodings.
ismalformed(c::AbstractChar) = false
isoverlong(c::AbstractChar) = false

function UInt32(c::Char)
# TODO: use optimized inline LLVM
u = reinterpret(UInt32, c)
Expand Down Expand Up @@ -69,50 +111,57 @@ function Char(b::Union{Int8,UInt8})
0 b 0x7f ? reinterpret(Char, (b % UInt32) << 24) : Char(UInt32(b))
end

convert(::Type{Char}, x::Number) = Char(x)
convert(::Type{T}, x::Char) where {T<:Number} = T(x)
convert(::Type{AbstractChar}, x::Number) = Char(x) # default to Char
convert(::Type{T}, x::Number) where {T<:AbstractChar} = T(x)
convert(::Type{T}, x::AbstractChar) where {T<:Number} = T(x)

rem(x::Char, ::Type{T}) where {T<:Number} = rem(UInt32(x), T)
rem(x::AbstractChar, ::Type{T}) where {T<:Number} = rem(UInt32(x), T)

typemax(::Type{Char}) = reinterpret(Char, typemax(UInt32))
typemin(::Type{Char}) = reinterpret(Char, typemin(UInt32))

size(c::Char) = ()
size(c::Char,d) = convert(Int, d) < 1 ? throw(BoundsError()) : 1
ndims(c::Char) = 0
ndims(::Type{Char}) = 0
length(c::Char) = 1
firstindex(c::Char) = 1
lastindex(c::Char) = 1
getindex(c::Char) = c
getindex(c::Char, i::Integer) = i == 1 ? c : throw(BoundsError())
getindex(c::Char, I::Integer...) = all(x -> x == 1, I) ? c : throw(BoundsError())
first(c::Char) = c
last(c::Char) = c
eltype(::Type{Char}) = Char

start(c::Char) = false
next(c::Char, state) = (c, true)
done(c::Char, state) = state
isempty(c::Char) = false
in(x::Char, y::Char) = x == y
size(c::AbstractChar) = ()
size(c::AbstractChar,d) = convert(Int, d) < 1 ? throw(BoundsError()) : 1
ndims(c::AbstractChar) = 0
ndims(::Type{<:AbstractChar}) = 0
length(c::AbstractChar) = 1
firstindex(c::AbstractChar) = 1
lastindex(c::AbstractChar) = 1
getindex(c::AbstractChar) = c
getindex(c::AbstractChar, i::Integer) = i == 1 ? c : throw(BoundsError())
getindex(c::AbstractChar, I::Integer...) = all(x -> x == 1, I) ? c : throw(BoundsError())
first(c::AbstractChar) = c
last(c::AbstractChar) = c
eltype(::Type{T}) where {T<:AbstractChar} = T

start(c::AbstractChar) = false
next(c::AbstractChar, state) = (c, true)
done(c::AbstractChar, state) = state
isempty(c::AbstractChar) = false
in(x::AbstractChar, y::AbstractChar) = x == y

==(x::Char, y::Char) = reinterpret(UInt32, x) == reinterpret(UInt32, y)
isless(x::Char, y::Char) = reinterpret(UInt32, x) < reinterpret(UInt32, y)
hash(x::Char, h::UInt) =
hash_uint64(((reinterpret(UInt32, x) + UInt64(0xd4d64234)) << 32) UInt64(h))
widen(::Type{Char}) = Char

-(x::Char, y::Char) = Int(x) - Int(y)
-(x::Char, y::Integer) = Char(Int32(x) - Int32(y))
+(x::Char, y::Integer) = Char(Int32(x) + Int32(y))
+(x::Integer, y::Char) = y + x
# fallbacks:
isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y))
==(x::AbstractChar, y::AbstractChar) = Char(x) == Char(y)
hash(x::AbstractChar, h::UInt) =
hash_uint64(((UInt32(x) + UInt64(0xd060fad0)) << 32) UInt64(h))
widen(::Type{T}) where {T<:AbstractChar} = T

-(x::AbstractChar, y::AbstractChar) = Int(x) - Int(y)
-(x::T, y::Integer) where {T<:AbstractChar} = T(Int32(x) - Int32(y))
+(x::T, y::Integer) where {T<:AbstractChar} = T(Int32(x) + Int32(y))
+(x::Integer, y::AbstractChar) = y + x

print(io::IO, c::Char) = (write(io, c); nothing)
print(io::IO, c::AbstractChar) = (write(io, c); nothing)

const hex_chars = UInt8['0':'9';'a':'z']

function show(io::IO, c::Char)
function show(io::IO, c::AbstractChar)
if c <= '\\'
b = c == '\0' ? 0x30 :
c == '\a' ? 0x61 :
Expand Down Expand Up @@ -154,14 +203,14 @@ function show(io::IO, c::Char)
return
end

function show(io::IO, ::MIME"text/plain", c::Char)
function show(io::IO, ::MIME"text/plain", c::T) where {T<:AbstractChar}
show(io, c)
if !ismalformed(c)
print(io, ": ")
if isoverlong(c)
print(io, "[overlong] ")
u = decode_overlong(c)
c = Char(u)
c = T(u)
else
u = UInt32(c)
end
Expand Down
2 changes: 1 addition & 1 deletion base/compiler/validation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ is_valid_lvalue(x) = isa(x, Slot) || isa(x, SSAValue) || isa(x, GlobalRef)
function is_valid_argument(x)
if isa(x, Slot) || isa(x, SSAValue) || isa(x, GlobalRef) || isa(x, QuoteNode) ||
(isa(x,Expr) && (x.head in (:static_parameter, :boundscheck, :copyast))) ||
isa(x, Number) || isa(x, AbstractString) || isa(x, Char) || isa(x, Tuple) ||
isa(x, Number) || isa(x, AbstractString) || isa(x, AbstractChar) || isa(x, Tuple) ||
isa(x, Type) || isa(x, Core.Box) || isa(x, Module) || x === nothing
return true
end
Expand Down
1 change: 1 addition & 0 deletions base/filesystem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ function read(f::File, ::Type{Char})
end
return reinterpret(Char, c)
end
read(f::File, ::Type{T}) where {T<:AbstractChar} = T(read(f, Char)) # fallback

function unsafe_read(f::File, p::Ptr{UInt8}, nel::UInt)
check_open(f)
Expand Down
14 changes: 8 additions & 6 deletions base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io), byte)
unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io), p, nb)
read(io::AbstractPipe) = read(pipe_reader(io))
readuntil(io::AbstractPipe, arg::UInt8; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::Char; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::AbstractChar; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::AbstractString; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil(io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(pipe_reader(io), arg; kw...)
readuntil_vector!(io::AbstractPipe, target::AbstractVector, keep::Bool, out) = readuntil_vector!(pipe_reader(io), target, keep, out)
Expand Down Expand Up @@ -303,7 +303,7 @@ read!(filename::AbstractString, a) = open(io->read!(io, a), filename)
readuntil(filename::AbstractString, delim; keep::Bool = false)
Read a string from an I/O stream or a file, up to the given delimiter.
The delimiter can be a `UInt8`, `Char`, string, or vector.
The delimiter can be a `UInt8`, `AbstractChar`, string, or vector.
Keyword argument `keep` controls whether the delimiter is included in the result.
The text is assumed to be encoded in UTF-8.
Expand Down Expand Up @@ -570,6 +570,7 @@ function write(io::IO, c::Char)
n += 1
end
end
write(io::IO, c::AbstractChar) = write(io, Char(c)) # fallback

function write(io::IO, s::Symbol)
pname = unsafe_convert(Ptr{UInt8}, s)
Expand Down Expand Up @@ -627,12 +628,13 @@ function read(io::IO, ::Type{Char})
end
return reinterpret(Char, c)
end
read(io::IO, ::Type{T}) where {T<:AbstractChar} = T(read(io, Char)) # fallback

# readuntil_string is useful below since it has
# an optimized method for s::IOStream
readuntil_string(s::IO, delim::UInt8, keep::Bool) = String(readuntil(s, delim, keep=keep))

function readuntil(s::IO, delim::Char; keep::Bool=false)
function readuntil(s::IO, delim::AbstractChar; keep::Bool=false)
if delim '\x7f'
return readuntil_string(s, delim % UInt8, keep)
end
Expand Down Expand Up @@ -994,7 +996,7 @@ function skipchars(predicate, io::IO; linecomment=nothing)
end

"""
countlines(io::IO; eol::Char = '\\n')
countlines(io::IO; eol::AbstractChar = '\\n')
Read `io` until the end of the stream/file and count the number of lines. To specify a file
pass the filename as the first argument. EOL markers other than `'\\n'` are supported by
Expand All @@ -1017,7 +1019,7 @@ julia> countlines(io, eol = '.')
1
```
"""
function countlines(io::IO; eol::Char='\n')
function countlines(io::IO; eol::AbstractChar='\n')
isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported"))
aeol = UInt8(eol)
a = Vector{UInt8}(uninitialized, 8192)
Expand All @@ -1034,4 +1036,4 @@ function countlines(io::IO; eol::Char='\n')
nl
end

countlines(f::AbstractString; eol::Char = '\n') = open(io->countlines(io, eol = eol), f)::Int
countlines(f::AbstractString; eol::AbstractChar = '\n') = open(io->countlines(io, eol = eol), f)::Int
2 changes: 1 addition & 1 deletion base/iterators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ first(r::Reverse) = last(r.itr) # and the last shall be first
reverse(R::AbstractRange) = Base.reverse(R) # copying ranges is cheap
reverse(G::Generator) = Generator(G.f, reverse(G.iter))
reverse(r::Reverse) = r.itr
reverse(x::Union{Number,Char}) = x
reverse(x::Union{Number,AbstractChar}) = x
reverse(p::Pair) = Base.reverse(p) # copying pairs is cheap

start(r::Reverse{<:Tuple}) = length(r.itr)
Expand Down
2 changes: 1 addition & 1 deletion base/parse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ julia> parse(Complex{Float64}, "3.2e-1 + 4.5im")
"""
parse(T::Type, str; base = Int)

function parse(::Type{T}, c::Char; base::Integer = 36) where T<:Integer
function parse(::Type{T}, c::AbstractChar; base::Integer = 36) where T<:Integer
a::Int = (base <= 36 ? 10 : 36)
2 <= base <= 62 || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
d = '0' <= c <= '9' ? c-'0' :
Expand Down
2 changes: 1 addition & 1 deletion base/range.jl
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ in(x::Integer, r::AbstractUnitRange{<:Integer}) = (first(r) <= x) & (x <= last(r
in(x::Real, r::AbstractRange{T}) where {T<:Integer} =
isinteger(x) && !isempty(r) && x >= minimum(r) && x <= maximum(r) &&
(mod(convert(T,x),step(r))-mod(first(r),step(r)) == 0)
in(x::Char, r::AbstractRange{Char}) =
in(x::AbstractChar, r::AbstractRange{<:AbstractChar}) =
!isempty(r) && x >= minimum(r) && x <= maximum(r) &&
(mod(Int(x) - Int(first(r)), step(r)) == 0)

Expand Down
4 changes: 2 additions & 2 deletions base/reduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ reduce_empty(op, T) = _empty_reduce_error()
reduce_empty(::typeof(+), T) = zero(T)
reduce_empty(::typeof(+), ::Type{Bool}) = zero(Int)
reduce_empty(::typeof(*), T) = one(T)
reduce_empty(::typeof(*), ::Type{Char}) = ""
reduce_empty(::typeof(*), ::Type{<:AbstractChar}) = ""
reduce_empty(::typeof(&), ::Type{Bool}) = true
reduce_empty(::typeof(|), ::Type{Bool}) = false

Expand Down Expand Up @@ -307,7 +307,7 @@ different types than its inputs.
"""
reduce_first(op, x) = x
reduce_first(::typeof(+), x::Bool) = Int(x)
reduce_first(::typeof(*), x::Char) = string(x)
reduce_first(::typeof(*), x::AbstractChar) = string(x)

reduce_first(::typeof(add_sum), x) = reduce_first(+, x)
reduce_first(::typeof(add_sum), x::SmallSigned) = Int(x)
Expand Down
2 changes: 1 addition & 1 deletion base/shell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ function print_shell_escaped_posixly(io::IO, args::AbstractString...)
# that any (reasonable) shell will definitely never consider them to be special
have_single = false
have_double = false
function isword(c::Char)
function isword(c::AbstractChar)
if '0' <= c <= '9' || 'a' <= c <= 'z' || 'A' <= c <= 'Z'
# word characters
elseif c == '_' || c == '/' || c == '+' || c == '-'
Expand Down
4 changes: 2 additions & 2 deletions base/show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -763,8 +763,8 @@ const expr_parens = Dict(:tuple=>('(',')'), :vcat=>('[',']'),

## AST decoding helpers ##

is_id_start_char(c::Char) = ccall(:jl_id_start_char, Cint, (UInt32,), c) != 0
is_id_char(c::Char) = ccall(:jl_id_char, Cint, (UInt32,), c) != 0
is_id_start_char(c::AbstractChar) = ccall(:jl_id_start_char, Cint, (UInt32,), c) != 0
is_id_char(c::AbstractChar) = ccall(:jl_id_char, Cint, (UInt32,), c) != 0
function isidentifier(s::AbstractString)
isempty(s) && return false
c, rest = Iterators.peel(s)
Expand Down
Loading

0 comments on commit 63e04bf

Please sign in to comment.