Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Base.transcode to replace utf8to16 and utf16to8 #16974

Merged
merged 1 commit into from
Jun 16, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions base/c.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,15 @@ if is_windows()
function cwstring(s::AbstractString)
bytes = String(s).data
0 in bytes && throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))
return push!(utf8to16(bytes), 0)
return push!(transcode(UInt16, bytes), 0)
end
end

# conversions between UTF-8 and UTF-16 for Windows APIs
# transcoding between data in UTF-8 and UTF-16 for Windows APIs

function utf8to16(src::Vector{UInt8})
transcode{T<:Union{UInt8,UInt16}}(::Type{T}, src::Vector{T}) = src

function transcode(::Type{UInt16}, src::Vector{UInt8})
dst = UInt16[]
i, n = 1, length(src)
n > 0 || return dst
Expand Down Expand Up @@ -162,7 +164,7 @@ function utf8to16(src::Vector{UInt8})
return dst
end

function utf16to8(src::Vector{UInt16})
function transcode(::Type{UInt8}, src::Vector{UInt16})
dst = UInt8[]
i, n = 1, length(src)
n > 0 || return dst
Expand Down
4 changes: 2 additions & 2 deletions base/env.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function access_env(onError::Function, str::AbstractString)
error(string("getenv: ", str, ' ', len, "-1 != ", ret, ": ", Libc.FormatMessage()))
end
pop!(val) # NUL
return String(utf16to8(val))
return String(transcode(UInt8, val))
end

function _setenv(svar::AbstractString, sval::AbstractString, overwrite::Bool=true)
Expand Down Expand Up @@ -97,7 +97,7 @@ function next(hash::EnvHash, block::Tuple{Ptr{UInt16},Ptr{UInt16}})
len = ccall(:wcslen, UInt, (Ptr{UInt16},), pos)
buf = Array{UInt16}(len)
unsafe_copy!(pointer(buf), pos, len)
env = String(utf16to8(buf))
env = String(transcode(UInt8, buf))
m = match(r"^(=?[^=]+)=(.*)$"s, env)
if m === nothing
error("malformed environment entry: $env")
Expand Down
4 changes: 2 additions & 2 deletions base/file.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ function tempdir()
error("GetTempPath failed: $(Libc.FormatMessage())")
end
resize!(temppath,lentemppath)
return String(utf16to8(temppath))
return String(transcode(UInt8, temppath))
end
tempname(uunique::UInt32=UInt32(0)) = tempname(tempdir(), uunique)
const temp_prefix = cwstring("jl_")
Expand All @@ -216,7 +216,7 @@ function tempname(temppath::AbstractString,uunique::UInt32)
error("GetTempFileName failed: $(Libc.FormatMessage())")
end
resize!(tname,lentname)
return String(utf16to8(tname))
return String(transcode(UInt8, tname))
end
function mktemp(parent=tempdir())
filename = tempname(parent, UInt32(0))
Expand Down
4 changes: 2 additions & 2 deletions base/filesystem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ export File,
import Base:
UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
nb_available, position, read, read!, readavailable, seek, seekend, show,
skip, stat, unsafe_read, unsafe_write, utf16to8, utf8to16, uv_error,
uvhandle, uvtype, write
skip, stat, unsafe_read, unsafe_write, transcode, uv_error, uvhandle,
uvtype, write

if is_windows()
import Base: cwstring
Expand Down
2 changes: 1 addition & 1 deletion base/interactiveutil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ elseif is_windows()
len = 0
while unsafe_load(plock, len+1) != 0; len += 1; end
# get Vector{UInt16}, transcode data to UTF-8, make a String of it
s = String(utf16to8(unsafe_wrap(Array, plock, len)))
s = String(transcode(UInt8, unsafe_wrap(Array, plock, len)))
systemerror(:GlobalUnlock, 0==ccall((:GlobalUnlock, "kernel32"), stdcall, Cint, (Ptr{UInt16},), plock))
return s
end
Expand Down
8 changes: 4 additions & 4 deletions base/libc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@

module Libc

import Base: transcode

export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, calloc, realloc,
errno, strerror, flush_cstdio, systemsleep, time
errno, strerror, flush_cstdio, systemsleep, time, transcode
if is_windows()
export GetLastError, FormatMessage
end

import Base: utf16to8

include(string(length(Core.ARGS)>=2?Core.ARGS[2]:"","errno_h.jl")) # include($BUILDROOT/base/errno_h.jl)

## RawFD ##
Expand Down Expand Up @@ -277,7 +277,7 @@ if is_windows()
buf = Array{UInt16}(len)
unsafe_copy!(pointer(buf), p, len)
ccall(:LocalFree,stdcall,Ptr{Void},(Ptr{Void},),p)
return String(utf16to8(buf))
return String(transcode(UInt8, buf))
end
end

Expand Down
4 changes: 2 additions & 2 deletions base/path.jl
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ function realpath(path::AbstractString)
systemerror(:realpath, n == 0)
x = n < length(buf) # is the buffer big enough?
resize!(buf, n) # shrink if x, grow if !x
x && return String(utf16to8(buf))
x && return String(transcode(UInt8, buf))
end
end

Expand All @@ -150,7 +150,7 @@ function longpath(path::AbstractString)
systemerror(:longpath, n == 0)
x = n < length(buf) # is the buffer big enough?
resize!(buf, n) # shrink if x, grow if !x
x && return String(utf16to8(buf))
x && return String(transcode(UInt8, buf))
end
end

Expand Down
24 changes: 12 additions & 12 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,11 @@ whos(IOBuffer(), Tmp14173) # warm up
@test @allocated(whos(IOBuffer(), Tmp14173)) < 10000

## test conversion from UTF-8 to UTF-16 (for Windows APIs)
import Base: utf8to16, utf16to8
import Base.Libc: transcode

# empty arrays
@test utf8to16(UInt8[]) == UInt16[]
@test utf16to8(UInt16[]) == UInt8[]
@test transcode(UInt16, UInt8[]) == UInt16[]
@test transcode(UInt8, UInt16[]) == UInt8[]

# UTF-8-like sequences
V8 = [
Expand Down Expand Up @@ -304,15 +304,15 @@ I8 = [(s,map(UInt16,s)) for s in X8]

for (X,Y,Z) in ((V8,V8,V8), (I8,V8,I8), (V8,I8,V8), (V8,V8,I8), (I8,V8,V8))
for (a8, a16) in X
@test utf8to16(a8) == a16
@test transcode(UInt16, a8) == a16
for (b8, b16) in Y
ab8 = [a8; b8]
ab16 = [a16; b16]
@test utf8to16(ab8) == ab16
@test transcode(UInt16, ab8) == ab16
for (c8, c16) in Z
abc8 = [ab8; c8]
abc16 = [ab16; c16]
@test utf8to16(abc8) == abc16
@test transcode(UInt16, abc8) == abc16
end
end
end
Expand Down Expand Up @@ -359,18 +359,18 @@ I16 = [

for (X,Y,Z) in ((V16,V16,V16), (I16,V16,I16), (V16,I16,V16), (V16,V16,I16), (I16,V16,V16))
for (a16, a8) in X
@test utf16to8(a16) == a8
@test utf8to16(a8) == a16
@test transcode(UInt8, a16) == a8
@test transcode(UInt16, a8) == a16
for (b16, b8) in Y
ab16 = [a16; b16]
ab8 = [a8; b8]
@test utf16to8(ab16) == ab8
@test utf8to16(ab8) == ab16
@test transcode(UInt8, ab16) == ab8
@test transcode(UInt16, ab8) == ab16
for (c16, c8) in Z
abc16 = [ab16; c16]
abc8 = [ab8; c8]
@test utf16to8(abc16) == abc8
@test utf8to16(abc8) == abc16
@test transcode(UInt8, abc16) == abc8
@test transcode(UInt16, abc8) == abc16
end
end
end
Expand Down