-
Notifications
You must be signed in to change notification settings - Fork 36
/
Copy pathconversion.jl
264 lines (246 loc) · 10.1 KB
/
conversion.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
import Core: Expr
# Terminals
function julia_normalization_map(c::Int32, x::Ptr{Nothing})::Int32
return c == 0x00B5 ? 0x03BC : # micro sign -> greek small letter mu
c == 0x025B ? 0x03B5 : # latin small letter open e -> greek small letter
c
end
# Note: This code should be in julia base
function utf8proc_map_custom(str::String, options)
norm_func = @cfunction julia_normalization_map Int32 (Int32, Ptr{Nothing})
nwords = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Nothing}, Ptr{Nothing}),
str, sizeof(str), C_NULL, 0, options, norm_func, C_NULL)
nwords < 0 && Base.Unicode.utf8proc_error(nwords)
buffer = Base.StringVector(nwords * 4)
nwords = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Nothing}, Ptr{Nothing}),
str, sizeof(str), buffer, nwords, options, norm_func, C_NULL)
nwords < 0 && Base.Unicode.utf8proc_error(nwords)
nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options)
nbytes < 0 && Base.Unicode.utf8proc_error(nbytes)
return String(resize!(buffer, nbytes))
end
function normalize_julia_identifier(str::AbstractString)
options = Base.Unicode.UTF8PROC_STABLE | Base.Unicode.UTF8PROC_COMPOSE
utf8proc_map_custom(String(str), options)
end
function sized_uint_literal(s::AbstractString, b::Integer)
# We know integers are all ASCII, so we can use sizeof to compute
# the length of ths string more quickly
l = (sizeof(s) - 2) * b
l <= 8 && return Base.parse(UInt8, s)
l <= 16 && return Base.parse(UInt16, s)
l <= 32 && return Base.parse(UInt32, s)
l <= 64 && return Base.parse(UInt64, s)
# l <= 128 && return Base.parse(UInt128, s)
if l <= 128
@static if VERSION >= v"1.1"
return Expr(:macrocall, GlobalRef(Core, Symbol("@uint128_str")), nothing, s)
else
return Expr(:macrocall, Symbol("@uint128_str"), nothing, s)
end
end
return Expr(:macrocall, GlobalRef(Core, Symbol("@big_str")), nothing, s)
end
function sized_uint_oct_literal(s::AbstractString)
s[3] == 0 && return sized_uint_literal(s, 3)
len = sizeof(s)
(len < 5 || (len == 5 && s <= "0o377")) && return Base.parse(UInt8, s)
(len < 8 || (len == 8 && s <= "0o177777")) && return Base.parse(UInt16, s)
(len < 13 || (len == 13 && s <= "0o37777777777")) && return Base.parse(UInt32, s)
(len < 24 || (len == 24 && s <= "0o1777777777777777777777")) && return Base.parse(UInt64, s)
# (len < 45 || (len == 45 && s <= "0o3777777777777777777777777777777777777777777")) && return Base.parse(UInt128, s)
# return Base.parse(BigInt, s)
if (len < 45 || (len == 45 && s <= "0o3777777777777777777777777777777777777777777"))
@static if VERSION >= v"1.1"
return Expr(:macrocall, GlobalRef(Core, Symbol("@uint128_str")), nothing, s)
else
return Expr(:macrocall, Symbol("@uint128_str"), nothing, s)
end
end
return Meta.parse(s)
end
function _literal_expr(x)
if headof(x) === :TRUE
return true
elseif headof(x) === :FALSE
return false
elseif is_nothing(x)
return nothing
elseif headof(x) === :INTEGER || headof(x) === :BININT || headof(x) === :HEXINT || headof(x) === :OCTINT
return Expr_int(x)
elseif isfloat(x)
return Expr_float(x)
elseif ischar(x)
return Expr_char(x)
elseif headof(x) === :MACRO
return Symbol(valof(x))
elseif headof(x) === :STRING || headof(x) === :TRIPLESTRING
return valof(x)
elseif headof(x) === :CMD
return Expr_cmd(x)
elseif headof(x) === :TRIPLECMD
return Expr_tcmd(x)
end
end
const TYPEMAX_INT64_STR = string(typemax(Int))
const TYPEMAX_INT128_STR = string(typemax(Int128))
function Expr_int(x)
is_hex = is_oct = is_bin = false
val = replace(valof(x), "_" => "")
if sizeof(val) > 2 && val[1] == '0'
c = val[2]
c == 'x' && (is_hex = true)
c == 'o' && (is_oct = true)
c == 'b' && (is_bin = true)
end
is_hex && return sized_uint_literal(val, 4)
is_oct && return sized_uint_oct_literal(val)
is_bin && return sized_uint_literal(val, 1)
# sizeof(val) <= sizeof(TYPEMAX_INT64_STR) && return Base.parse(Int64, val)
return Meta.parse(val)
# # val < TYPEMAX_INT64_STR && return Base.parse(Int64, val)
# sizeof(val) <= sizeof(TYPEMAX_INTval < TYPEMAX_INT128_STR128_STR) && return Base.parse(Int128, val)
# # val < TYPEMAX_INT128_STR && return Base.parse(Int128, val)
# Base.parse(BigInt, val)
end
function Expr_float(x)
if !startswith(valof(x), "0x") && 'f' in valof(x)
return Base.parse(Float32, replace(replace(valof(x), 'f' => 'e'), '_' => ""))
end
Base.parse(Float64, replace(valof(x), "_" => ""))
end
function Expr_char(x)
val = _unescape_string(valof(x)[2:prevind(valof(x), lastindex(valof(x)))])
# one byte e.g. '\xff' maybe not valid UTF-8
# but we want to use the raw value as a codepoint in this case
sizeof(val) == 1 && return Char(codeunit(val, 1))
length(val) == 1 || error("Invalid character literal: $(Vector{UInt8}(valof(x)))")
val[1]
end
# Expressions
function Expr(x::EXPR)
if isidentifier(x)
if headof(x) === :NONSTDIDENTIFIER
if startswith(valof(x.args[1]), "@")
Symbol("@", normalize_julia_identifier(valof(x.args[2])))
else
Symbol(normalize_julia_identifier(valof(x.args[2])))
end
else
return Symbol(normalize_julia_identifier(valof(x)))
end
elseif iskeyword(x)
if headof(x) === :BREAK
return Expr(:break)
elseif headof(x) === :CONTINUE
return Expr(:continue)
else
return Symbol(lowercase(string(headof(x))))
end
elseif isoperator(x)
return Symbol(valof(x))
elseif ispunctuation(x)
if headof(x) === :DOT
if x.args === nothing
return :(.)
elseif length(x.args) == 1 && isoperator(x.args[1])
return Expr(:(.), Expr(x.args[1]))
else
Expr(:error)
end
else
# We only reach this if we have a malformed expression.
Expr(:error)
end
elseif isliteral(x)
return _literal_expr(x)
elseif isbracketed(x)
return Expr(x.args[1])
elseif x.head isa EXPR
Expr(Expr(x.head), Expr.(x.args)...)
elseif x.head === :quotenode
QuoteNode(Expr(x.args[1]))
elseif x.head === :globalrefdoc
GlobalRef(Core, Symbol("@doc"))
elseif x.head === :globalrefcmd
if VERSION >= v"1.1"
GlobalRef(Core, Symbol("@cmd"))
else
Symbol("@cmd")
end
elseif x.head === :macrocall && is_getfield_w_quotenode(x.args[1]) && !ismacroname(x.args[1].args[2].args[1])
# Shift '@' to the right
valofrhs = valof(x.args[1].args[2].args[1])
valofrhs = valofrhs === nothing ? "" : valofrhs
new_name = Expr(:., remove_at(x.args[1].args[1]), QuoteNode(Symbol("@", valofrhs)))
Expr(:macrocall, new_name, Expr.(x.args[2:end])...)
elseif x.head === :macrocall && isidentifier(x.args[1]) && valof(x.args[1]) == "@."
Expr(:macrocall, Symbol("@__dot__"), Expr.(x.args[2:end])...)
elseif x.head === :macrocall && length(x.args) == 3 && x.args[1].head === :globalrefcmd && x.args[3].head == :string
Expr(:macrocall, Expr(x.args[1]), Expr(x.args[2]), x.args[3].meta)
elseif x.head === :string && length(x.args) > 0 && (x.args[1].head === :STRING || x.args[1].head === :TRIPLESTRING) && isempty(valof(x.args[1]))
# Special conversion needed - the initial text section is treated as empty for the represented string following lowest-common-prefix adjustments, but exists in the source.
Expr(:string, Expr.(x.args[2:end])...)
elseif x.args === nothing
Expr(Symbol(lowercase(String(x.head))))
elseif x.head === :errortoken
Expr(:error)
else
Expr(Symbol(lowercase(String(x.head))), Expr.(x.args)...)
end
end
function remove_at(x)
if isidentifier(x) && valof(x) !== nothing && first(valof(x)) == '@'
return Symbol(valof(x)[2:end])
elseif is_getfield_w_quotenode(x)
Expr(:., remove_at(x.args[1]), QuoteNode(remove_at(x.args[2].args[1])))
else
Expr(x)
end
end
# cross compatability for line number insertion in macrocalls
if VERSION > v"1.1-"
Expr_cmd(x) = Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), nothing, valof(x))
Expr_tcmd(x) = Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), nothing, valof(x))
else
Expr_cmd(x) = Expr(:macrocall, Symbol("@cmd"), nothing, valof(x))
Expr_tcmd(x) = Expr(:macrocall, Symbol("@cmd"), nothing, valof(x))
end
function clear_at!(x)
if x isa Expr && x.head == :.
if x.args[2] isa QuoteNode && string(x.args[2].value)[1] == '@'
x.args[2].value = Symbol(string(x.args[2].value)[2:end])
end
if x.args[1] isa Symbol && string(x.args[1])[1] == '@'
x.args[1] = Symbol(string(x.args[1])[2:end])
else
clear_at!(x.args[1])
end
end
end
"""
remlineinfo!(x)
Removes line info expressions. (i.e. Expr(:line, 1))
"""
function remlineinfo!(x)
if isa(x, Expr)
if x.head == :macrocall && x.args[2] !== nothing
id = findall(map(x -> (isa(x, Expr) && x.head == :line) || (@isdefined(LineNumberNode) && x isa LineNumberNode), x.args))
deleteat!(x.args, id)
for j in x.args
remlineinfo!(j)
end
insert!(x.args, 2, nothing)
else
id = findall(map(x -> (isa(x, Expr) && x.head == :line) || (@isdefined(LineNumberNode) && x isa LineNumberNode), x.args))
deleteat!(x.args, id)
for j in x.args
remlineinfo!(j)
end
end
if x.head == :elseif && x.args[1] isa Expr && x.args[1].head == :block && length(x.args[1].args) == 1
x.args[1] = x.args[1].args[1]
end
end
x
end