Skip to content

Commit

Permalink
Merge pull request #19449 from JuliaLang/jb/fasterstring
Browse files Browse the repository at this point in the history
faster `String` allocation
  • Loading branch information
JeffBezanson authored Jan 8, 2017
2 parents e55cdea + 8c687da commit cfac61d
Show file tree
Hide file tree
Showing 58 changed files with 808 additions and 568 deletions.
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ Breaking changes

This section lists changes that do not have deprecation warnings.

* `String`s no longer have a `.data` field (as part of a significant performance
improvement). Use `Vector{UInt8}(str)` to access a string as a byte array.
However, allocating the `Vector` object has overhead. You can also use
`codeunit(str, i)` to access the `i`th byte of a `String`.
Use `sizeof(str)` instead of `length(str.data)`, and `pointer(str)` instead of
`pointer(str.data)`. ([#19449])

* Operations between `Float16` and `Integers` now return `Float16` instead of `Float32`. ([#17261])

* Keyword arguments are processed left-to-right: if the same keyword is specified more than
Expand Down Expand Up @@ -818,6 +825,7 @@ Language tooling improvements
[#19233]: https://github.com/JuliaLang/julia/issues/19233
[#19288]: https://github.com/JuliaLang/julia/issues/19288
[#19305]: https://github.com/JuliaLang/julia/issues/19305
[#19449]: https://github.com/JuliaLang/julia/issues/19449
[#19469]: https://github.com/JuliaLang/julia/issues/19469
[#19543]: https://github.com/JuliaLang/julia/issues/19543
[#19598]: https://github.com/JuliaLang/julia/issues/19598
Expand Down
2 changes: 1 addition & 1 deletion base/LineEdit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ function splice_buffer!{T<:Integer}(buf::IOBuffer, r::UnitRange{T}, ins::Abstrac
elseif pos > last(r)
seek(buf, pos - length(r))
end
splice!(buf.data, r + 1, ins.data) # position(), etc, are 0-indexed
splice!(buf.data, r + 1, Vector{UInt8}(ins)) # position(), etc, are 0-indexed
buf.size = buf.size + sizeof(ins) - length(r)
seek(buf, position(buf) + sizeof(ins))
end
Expand Down
37 changes: 19 additions & 18 deletions base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,6 @@ else
typealias UInt UInt32
end

abstract AbstractString

function Typeof end
(f::typeof(Typeof))(x::ANY) = isa(x,Type) ? Type{x} : typeof(x)

Expand All @@ -192,12 +190,19 @@ type ErrorException <: Exception
msg::AbstractString
ErrorException(msg::AbstractString) = new(msg)
end

Expr(args::ANY...) = _expr(args...)

macro _noinline_meta()
Expr(:meta, :noinline)
end

immutable BoundsError <: Exception
a::Any
i::Any
BoundsError() = new()
BoundsError(a::ANY) = new(a)
BoundsError(a::ANY, i::ANY) = new(a,i)
BoundsError(a::ANY) = (@_noinline_meta; new(a))
BoundsError(a::ANY, i) = (@_noinline_meta; new(a,i))
end
immutable DivideError <: Exception end
immutable DomainError <: Exception end
Expand All @@ -221,11 +226,7 @@ end

abstract DirectIndexString <: AbstractString

immutable String <: AbstractString
data::Array{UInt8,1}
# required to make String("foo") work (#15120):
String(d::Array{UInt8,1}) = new(d)
end
String(s::String) = s # no constructor yet

# This should always be inlined
getptls() = ccall(:jl_get_ptls_states, Ptr{Void}, ())
Expand Down Expand Up @@ -278,8 +279,6 @@ immutable VecElement{T}
end
VecElement{T}(arg::T) = VecElement{T}(arg)

Expr(args::ANY...) = _expr(args...)

# used by lowering of splicing unquote
splicedexpr(hd::Symbol, args::Array{Any,1}) = (e=Expr(hd); e.args=args; e)

Expand Down Expand Up @@ -338,16 +337,18 @@ Array{T}(::Type{T}, m::Int) = Array{T,1}(m)
Array{T}(::Type{T}, m::Int,n::Int) = Array{T,2}(m,n)
Array{T}(::Type{T}, m::Int,n::Int,o::Int) = Array{T,3}(m,n,o)


# primitive Symbol constructors
Symbol(s::String) = Symbol(s.data)
function Symbol(s::String)
return ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int),
ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), s),
sizeof(s))
end
function Symbol(a::Array{UInt8,1})
return ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int),
ccall(:jl_array_ptr, Ptr{UInt8}, (Any,), a),
Intrinsics.arraylen(a))
ccall(:jl_array_ptr, Ptr{UInt8}, (Any,), a),
Intrinsics.arraylen(a))
end


# docsystem basics
macro doc(x...)
atdoc(x...)
Expand Down Expand Up @@ -378,8 +379,8 @@ unsafe_write(io::IO, x::Ptr{UInt8}, nb::Int) =
write(io::IO, x::UInt8) =
(ccall(:jl_uv_putb, Void, (Ptr{Void}, UInt8), io_pointer(io), x); 1)
function write(io::IO, x::String)
nb = sizeof(x.data)
unsafe_write(io, ccall(:jl_array_ptr, Ptr{UInt8}, (Any,), x.data), nb)
nb = sizeof(x)
unsafe_write(io, ccall(:jl_string_ptr, Ptr{UInt8}, (Any,), x), nb)
return nb
end

Expand Down
18 changes: 6 additions & 12 deletions base/c.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,21 +73,15 @@ pointer(p::Cwstring) = convert(Ptr{Cwchar_t}, p)
==(x::Union{Cstring,Cwstring}, y::Ptr) = pointer(x) == y
==(x::Ptr, y::Union{Cstring,Cwstring}) = x == pointer(y)

# here, not in pointer.jl, to avoid bootstrapping problems in coreimg.jl
unsafe_wrap(::Type{String}, p::Cstring, own::Bool=false) = unsafe_wrap(String, convert(Ptr{UInt8}, p), own)
unsafe_wrap(::Type{String}, p::Cstring, len::Integer, own::Bool=false) =
unsafe_wrap(String, convert(Ptr{UInt8}, p), len, own)
unsafe_string(s::Cstring) = unsafe_string(convert(Ptr{UInt8}, s))

# convert strings to String etc. to pass as pointers
cconvert(::Type{Cstring}, s::String) =
ccall(:jl_array_cconvert_cstring, Ref{Vector{UInt8}},
(Vector{UInt8},), s.data)
cconvert(::Type{Cstring}, s::String) = s
cconvert(::Type{Cstring}, s::AbstractString) =
cconvert(Cstring, String(s)::String)

function cconvert(::Type{Cwstring}, s::AbstractString)
v = transcode(Cwchar_t, String(s).data)
v = transcode(Cwchar_t, Vector{UInt8}(String(s)))
!isempty(v) && v[end] == 0 || push!(v, 0)
return v
end
Expand All @@ -100,7 +94,7 @@ containsnul(p::Ptr, len) =
containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
containsnul(s::AbstractString) = '\0' in s

function unsafe_convert(::Type{Cstring}, s::Vector{UInt8})
function unsafe_convert(::Type{Cstring}, s::Union{String,Vector{UInt8}})
p = unsafe_convert(Ptr{Cchar}, s)
containsnul(p, sizeof(s)) &&
throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
Expand Down Expand Up @@ -133,7 +127,7 @@ same argument.
This is only available on Windows.
"""
function cwstring(s::AbstractString)
bytes = String(s).data
bytes = Vector{UInt8}(String(s))
0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
return push!(transcode(UInt16, bytes), 0)
end
Expand Down Expand Up @@ -170,7 +164,7 @@ function transcode{S<:Union{Int32,UInt32}}(::Type{UInt8}, src::Vector{S})
take!(buf)
end
transcode(::Type{String}, src::String) = src
transcode(T, src::String) = transcode(T, src.data)
transcode(T, src::String) = transcode(T, Vector{UInt8}(src))
transcode(::Type{String}, src) = String(transcode(UInt8, src))

function transcode(::Type{UInt16}, src::Vector{UInt8})
Expand Down Expand Up @@ -257,7 +251,7 @@ function transcode(::Type{UInt8}, src::Vector{UInt16})
a = src[i += 1]
end

dst = Array{UInt8}(m)
dst = StringVector(m)
a = src[1]
i, j = 1, 0
while true
Expand Down
20 changes: 6 additions & 14 deletions base/datafmt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ function readdlm_auto(input::AbstractString, dlm::Char, T::Type, eol::Char, auto
# TODO: It would be nicer to use String(a) without making a copy,
# but because the mmap'ed array is not NUL-terminated this causes
# jl_try_substrtod to segfault below.
return readdlm_string(String(copy(a)), dlm, T, eol, auto, optsd)
return readdlm_string(unsafe_string(pointer(a),length(a)), dlm, T, eol, auto, optsd)
else
return readdlm_string(readstring(input), dlm, T, eol, auto, optsd)
end
Expand All @@ -153,7 +153,7 @@ type DLMOffsets <: DLMHandler
offsets = Array{Array{Int,1}}(1)
offsets[1] = Array{Int}(offs_chunk_size)
thresh = ceil(min(typemax(UInt), Base.Sys.total_memory()) / sizeof(Int) / 5)
new(offsets, 1, thresh, length(sbuff.data))
new(offsets, 1, thresh, sizeof(sbuff))
end
end

Expand Down Expand Up @@ -220,7 +220,7 @@ end

_chrinstr(sbuff::String, chr::UInt8, startpos::Int, endpos::Int) =
(endpos >= startpos) && (C_NULL != ccall(:memchr, Ptr{UInt8},
(Ptr{UInt8}, Int32, Csize_t), pointer(sbuff.data)+startpos-1, chr, endpos-startpos+1))
(Ptr{UInt8}, Int32, Csize_t), pointer(sbuff)+startpos-1, chr, endpos-startpos+1))

function store_cell{T}(dlmstore::DLMStore{T}, row::Int, col::Int,
quoted::Bool, startpos::Int, endpos::Int)
Expand Down Expand Up @@ -463,17 +463,9 @@ function colval{T<:Char}(sbuff::String, startpos::Int, endpos::Int, cells::Array
end
colval(sbuff::String, startpos::Int, endpos::Int, cells::Array, row::Int, col::Int) = true

function dlm_parse{T,D}(dbuff::T, eol::D, dlm::D, qchar::D, cchar::D,
ign_adj_dlm::Bool, allow_quote::Bool, allow_comments::Bool,
skipstart::Int, skipblanks::Bool, dh::DLMHandler)
all_ascii = (D <: UInt8) || (isascii(eol) &&
isascii(dlm) &&
(!allow_quote || isascii(qchar)) &&
(!allow_comments || isascii(cchar)))
if T === String && all_ascii
return dlm_parse(dbuff.data, eol % UInt8, dlm % UInt8, qchar % UInt8, cchar % UInt8,
ign_adj_dlm, allow_quote, allow_comments, skipstart, skipblanks, dh)
end
function dlm_parse{D}(dbuff::String, eol::D, dlm::D, qchar::D, cchar::D,
ign_adj_dlm::Bool, allow_quote::Bool, allow_comments::Bool,
skipstart::Int, skipblanks::Bool, dh::DLMHandler)
ncols = nrows = col = 0
is_default_dlm = (dlm == invalid_dlm(D))
error_str = ""
Expand Down
9 changes: 9 additions & 0 deletions base/deepcopy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ function deepcopy_internal(x::SimpleVector, stackdict::ObjectIdDict)
return y
end

function deepcopy_internal(x::String, stackdict::ObjectIdDict)
if haskey(stackdict, x)
return stackdict[x]
end
y = unsafe_string(pointer(x), sizeof(x))
stackdict[x] = y
return y
end

function deepcopy_internal(x::ANY, stackdict::ObjectIdDict)
T = typeof(x)::DataType
nf = nfields(T)
Expand Down
15 changes: 14 additions & 1 deletion base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ end
endn += 1
end
(endn > idx) && (endn -= 1)
splice!(a, idx:endn, invalids_as.data)
splice!(a, idx:endn, Vector{UInt8}(invalids_as))
l = length(a)
end
String(a)
Expand Down Expand Up @@ -1499,4 +1499,17 @@ end
# Calling promote_op is likely a bad idea, so deprecate its convenience wrapper promote_eltype_op
@deprecate promote_eltype_op(op, As...) promote_op(op, map(eltype, As)...)

function unsafe_wrap(::Type{String}, p::Union{Ptr{UInt8},Ptr{Int8}}, len::Integer, own::Bool=false)
Base.depwarn("unsafe_wrap(String, ...) is deprecated; use `unsafe_string` instead.", :unsafe_wrap)
#ccall(:jl_array_to_string, Ref{String}, (Any,),
# ccall(:jl_ptr_to_array_1d, Vector{UInt8}, (Any, Ptr{UInt8}, Csize_t, Cint),
# Vector{UInt8}, p, len, own))
unsafe_string(p, len)
end
unsafe_wrap(::Type{String}, p::Union{Ptr{UInt8},Ptr{Int8}}, own::Bool=false) =
unsafe_wrap(String, p, ccall(:strlen, Csize_t, (Ptr{UInt8},), p), own)
unsafe_wrap(::Type{String}, p::Cstring, own::Bool=false) = unsafe_wrap(String, convert(Ptr{UInt8}, p), own)
unsafe_wrap(::Type{String}, p::Cstring, len::Integer, own::Bool=false) =
unsafe_wrap(String, convert(Ptr{UInt8}, p), len, own)

# End deprecations scheduled for 0.6
2 changes: 1 addition & 1 deletion base/error.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ macro assert(ex, msgs...)
elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol))
# message is an expression needing evaluating
msg = :(Main.Base.string($(esc(msg))))
elseif isdefined(Main, :Base) && isdefined(Main.Base, :string)
elseif isdefined(Main, :Base) && isdefined(Main.Base, :string) && applicable(Main.Base.string, msg)
msg = Main.Base.string(msg)
else
# string() might not be defined during bootstrap
Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,7 @@ export
chomp,
chop,
chr2ind,
codeunit,
dec,
digits,
digits!,
Expand Down
5 changes: 2 additions & 3 deletions base/expr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@ Generates a symbol which will not conflict with other variable names.
"""
gensym() = ccall(:jl_gensym, Ref{Symbol}, ())

gensym(s::String) = gensym(s.data)
gensym(a::Array{UInt8,1}) =
ccall(:jl_tagged_gensym, Ref{Symbol}, (Ptr{UInt8}, Int32), a, length(a))
gensym(s::String) = ccall(:jl_tagged_gensym, Ref{Symbol}, (Ptr{UInt8}, Int32), s, sizeof(s))

gensym(ss::String...) = map(gensym, ss)
gensym(s::Symbol) =
ccall(:jl_tagged_gensym, Ref{Symbol}, (Ptr{UInt8}, Int32), s, ccall(:strlen, Csize_t, (Ptr{UInt8},), s))
Expand Down
5 changes: 4 additions & 1 deletion base/fft/FFTW.jl
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,10 @@ sprint_plan_{T<:fftwDouble}(plan::FFTWPlan{T}) =
sprint_plan_{T<:fftwSingle}(plan::FFTWPlan{T}) =
ccall((:fftwf_sprint_plan,libfftwf), Ptr{UInt8}, (PlanPtr,), plan)
function sprint_plan(plan::FFTWPlan)
unsafe_wrap(String, sprint_plan_(plan), true)
p = sprint_plan_(plan)
str = unsafe_string(p)
Libc.free(p)
return str
end

function show{T,K,inplace}(io::IO, p::cFFTWPlan{T,K,inplace})
Expand Down
4 changes: 2 additions & 2 deletions base/filesystem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,9 @@ function readbytes!(f::File, b::Array{UInt8}, nb=length(b))
uv_error("read",ret)
return ret
end
read(io::File) = read!(io, Array{UInt8}(nb_available(io)))
read(io::File) = read!(io, Base.StringVector(nb_available(io)))
readavailable(io::File) = read(io)
read(io::File, nb::Integer) = read!(io, Array{UInt8}(min(nb, nb_available(io))))
read(io::File, nb::Integer) = read!(io, Base.StringVector(min(nb, nb_available(io))))

const SEEK_SET = Int32(0)
const SEEK_CUR = Int32(1)
Expand Down
6 changes: 4 additions & 2 deletions base/gmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -532,8 +532,10 @@ hex(n::BigInt, pad::Int) = base(16, n, pad)

function base(b::Integer, n::BigInt)
2 <= b <= 62 || throw(ArgumentError("base must be 2 ≤ base ≤ 62, got $b"))
p = ccall((:__gmpz_get_str,:libgmp), Ptr{UInt8}, (Ptr{UInt8}, Cint, Ptr{BigInt}), C_NULL, b, &n)
unsafe_wrap(String, p, true)
nd = ndigits(n, b)
str = Base._string_n(n < 0 ? nd+1 : nd)
ccall((:__gmpz_get_str,:libgmp), Ptr{UInt8}, (Ptr{UInt8}, Cint, Ptr{BigInt}), str, b, &n)
return str
end

function base(b::Integer, n::BigInt, pad::Integer)
Expand Down
2 changes: 2 additions & 0 deletions base/interactiveutil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,8 @@ function summarysize(obj::Array, seen, excl)
return size
end

summarysize(s::String, seen, excl) = sizeof(Int) + sizeof(s)

function summarysize(obj::SimpleVector, seen, excl)
key = pointer_from_objref(obj)
haskey(seen, key) ? (return 0) : (seen[key] = true)
Expand Down
10 changes: 5 additions & 5 deletions base/intfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ string(x::Union{Int8,Int16,Int32,Int64,Int128}) = dec(x)

function bin(x::Unsigned, pad::Int, neg::Bool)
i = neg + max(pad,sizeof(x)<<3-leading_zeros(x))
a = Array{UInt8}(i)
a = StringVector(i)
while i > neg
a[i] = '0'+(x&0x1)
x >>= 1
Expand All @@ -389,7 +389,7 @@ end

function oct(x::Unsigned, pad::Int, neg::Bool)
i = neg + max(pad,div((sizeof(x)<<3)-leading_zeros(x)+2,3))
a = Array{UInt8}(i)
a = StringVector(i)
while i > neg
a[i] = '0'+(x&0x7)
x >>= 3
Expand All @@ -401,7 +401,7 @@ end

function dec(x::Unsigned, pad::Int, neg::Bool)
i = neg + max(pad,ndigits0z(x))
a = Array{UInt8}(i)
a = StringVector(i)
while i > neg
a[i] = '0'+rem(x,10)
x = oftype(x,div(x,10))
Expand All @@ -413,7 +413,7 @@ end

function hex(x::Unsigned, pad::Int, neg::Bool)
i = neg + max(pad,(sizeof(x)<<1)-(leading_zeros(x)>>2))
a = Array{UInt8}(i)
a = StringVector(i)
while i > neg
d = x & 0xf
a[i] = '0'+d+39*(d>9)
Expand All @@ -433,7 +433,7 @@ function base(b::Int, x::Unsigned, pad::Int, neg::Bool)
2 <= b <= 62 || throw(ArgumentError("base must be 2 ≤ base ≤ 62, got $b"))
digits = b <= 36 ? base36digits : base62digits
i = neg + max(pad,ndigits0z(x,b))
a = Array{UInt8}(i)
a = StringVector(i)
while i > neg
a[i] = digits[1+rem(x,b)]
x = div(x,b)
Expand Down
Loading

0 comments on commit cfac61d

Please sign in to comment.