Skip to content

Commit 9c6f48f

Browse files
author
KristofferC
committed
Change the byte order to correspond to C byte order, allows direct conversion to a pointer to pass into C-functions
1 parent 9ef9ff2 commit 9c6f48f

File tree

2 files changed

+194
-81
lines changed

2 files changed

+194
-81
lines changed

src/InlineStrings.jl

Lines changed: 110 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -71,52 +71,60 @@ end
7171

7272
const SmallInlineStrings = Union{String1, String3, String7, String15}
7373

74-
# used to zero out n lower bytes of an inline string
75-
clear_n_bytes(s, n) = Base.shl_int(Base.lshr_int(s, 8 * n), 8 * n)
76-
_bswap(x::T) where {T <: InlineString} = Base.bswap_int(x)
7774

78-
# Byte access abstraction layer
7975
@inline get_byte(x::T, i::Int) where {T <: InlineString} =
80-
Base.trunc_int(UInt8, Base.lshr_int(x, 8 * (sizeof(T) - i)))
76+
Base.trunc_int(UInt8, Base.lshr_int(x, 8 * (i - 1)))
8177

8278
@inline function set_byte(x::T, i::Int, b::UInt8) where {T <: InlineString}
83-
old_byte = get_byte(x, i)
84-
bit_pos = 8 * (sizeof(T) - i)
85-
x = Base.xor_int(x, Base.shl_int(Base.zext_int(T, old_byte), bit_pos))
86-
return Base.or_int(x, Base.shl_int(Base.zext_int(T, b), bit_pos))
79+
bit_pos = 8 * (i - 1)
80+
mask = Base.not_int(Base.shl_int(Base.zext_int(T, 0xff), bit_pos))
81+
cleared = Base.and_int(x, mask)
82+
return Base.or_int(cleared, Base.shl_int(Base.zext_int(T, b), bit_pos))
8783
end
8884

89-
@inline get_capacity_byte(x::InlineString) = Base.trunc_int(UInt8, x)
85+
@inline get_capacity_byte(x::T) where {T <: InlineString} =
86+
Base.trunc_int(UInt8, Base.lshr_int(x, 8 * (sizeof(T) - 1)))
9087

9188
@inline function set_capacity_byte(x::T, b::UInt8) where {T <: InlineString}
92-
old_capacity = get_capacity_byte(x)
93-
cleared = Base.xor_int(x, Base.zext_int(T, old_capacity))
94-
return Base.or_int(cleared, Base.zext_int(T, b))
89+
bit_pos = 8 * (sizeof(T) - 1)
90+
mask = Base.not_int(Base.shl_int(Base.zext_int(T, 0xff), bit_pos))
91+
cleared = Base.and_int(x, mask)
92+
return Base.or_int(cleared, Base.shl_int(Base.zext_int(T, b), bit_pos))
9593
end
9694

97-
@inline clear_suffix_bytes(x::InlineString, n::Int) = clear_n_bytes(x, n)
95+
@inline function clear_suffix_bytes(x::T, n::Int) where {T <: InlineString}
96+
n == 0 && return x
97+
n >= sizeof(T) && return create_with_length(T, 0)
98+
result = create_with_length(T, 0)
99+
keep_bytes = sizeof(T) - n
100+
for i in 1:keep_bytes
101+
result = set_byte(result, i, get_byte(x, i))
102+
end
103+
return result
104+
end
98105

99106
@inline function clear_prefix_bytes(x::T, n::Int) where {T <: InlineString}
107+
n == 0 && return x
100108
capacity = get_capacity_byte(x)
101-
without_capacity = Base.xor_int(x, Base.zext_int(T, capacity))
102-
shifted = Base.shl_int(without_capacity, 8 * n)
103-
return Base.or_int(shifted, Base.zext_int(T, capacity))
109+
data_only = Base.and_int(x, Base.not_int(Base.shl_int(Base.zext_int(T, 0xff), 8 * (sizeof(T) - 1))))
110+
shifted_data = Base.lshr_int(data_only, 8 * n)
111+
return set_capacity_byte(shifted_data, capacity)
104112
end
105113

106-
@inline create_with_length(::Type{T}, length::Int) where {T <: InlineString} =
107-
Base.zext_int(T, trailing_byte(T, length))
114+
@inline function create_with_length(::Type{T}, length::Int) where {T <: InlineString}
115+
capacity_byte = trailing_byte(T, length)
116+
return Base.shl_int(Base.zext_int(T, capacity_byte), 8 * (sizeof(T) - 1))
117+
end
108118

109-
@inline get_string_data(x::InlineString) = Base.lshr_int(x, 8)
119+
@inline function get_string_data(x::T) where {T <: InlineString}
120+
capacity_mask = Base.shl_int(Base.zext_int(T, 0xff), 8 * (sizeof(T) - 1))
121+
return Base.and_int(x, Base.not_int(capacity_mask))
122+
end
110123

111124
@inline function resize_string_data(x::S, ::Type{T}) where {S <: InlineString, T <: InlineString}
112125
sizeof(T) == sizeof(S) && return x
113-
if sizeof(T) > sizeof(S)
114-
data = get_string_data(x)
115-
return Base.shl_int(Base.zext_int(T, data), 8 * (sizeof(T) - sizeof(S) + 1))
116-
else
117-
shift = 8 * (sizeof(S) - sizeof(T))
118-
return Base.trunc_int(T, Base.lshr_int(x, shift))
119-
end
126+
data = get_string_data(x)
127+
return sizeof(T) > sizeof(S) ? Base.zext_int(T, data) : Base.trunc_int(T, data)
120128
end
121129

122130
const InlineStringTypes = Union{InlineString1,
@@ -170,7 +178,7 @@ end
170178
function Base.String(x::T) where {T <: InlineString}
171179
len = ncodeunits(x)
172180
out = Base._string_n(len)
173-
ref = Ref{T}(_bswap(x))
181+
ref = Ref{T}(x)
174182
GC.@preserve ref out begin
175183
ptr = convert(Ptr{UInt8}, Base.unsafe_convert(Ptr{T}, ref))
176184
unsafe_copyto!(pointer(out), ptr, len)
@@ -179,17 +187,17 @@ function Base.String(x::T) where {T <: InlineString}
179187
end
180188

181189
function Base.Symbol(x::T) where {T <: InlineString}
182-
ref = Ref{T}(_bswap(x))
190+
ref = Ref{T}(x)
183191
return ccall(:jl_symbol_n, Ref{Symbol},
184192
(Ref{T}, Int), ref, sizeof(x))
185193
end
186194

187195
Base.cconvert(::Type{Ptr{UInt8}}, x::T) where {T <: InlineString} =
188-
Ref{T}(_bswap(x))
196+
Ref{T}(x)
189197
Base.cconvert(::Type{Ptr{Int8}}, x::T) where {T <: InlineString} =
190-
Ref{T}(_bswap(x))
198+
Ref{T}(x)
191199
function Base.cconvert(::Type{Cstring}, x::T) where {T <: InlineString}
192-
ref = Ref{T}(_bswap(x))
200+
ref = Ref{T}(x)
193201
Base.containsnul(Ptr{Int8}(pointer_from_objref(ref)), sizeof(x)) &&
194202
throw(ArgumentError("embedded NULs are not allowed in C strings: $x"))
195203
return ref
@@ -234,9 +242,8 @@ for T in (:InlineString1, :InlineString3, :InlineString7, :InlineString15, :Inli
234242
len = sizeof(x)
235243
len < sizeof($T) || stringtoolong($T, len)
236244
y = GC.@preserve x unsafe_load(convert(Ptr{$T}, pointer(x)))
237-
sz = 8 * (sizeof($T) - len)
238245
# Clear unused bytes and set capacity byte
239-
cleared = Base.shl_int(Base.lshr_int(_bswap(y), sz), sz)
246+
cleared = clear_suffix_bytes(y, sizeof($T) - len)
240247
return set_capacity_byte(cleared, trailing_byte($T, len))
241248
else
242249
len = ncodeunits(x)
@@ -265,9 +272,8 @@ for T in (:InlineString1, :InlineString3, :InlineString7, :InlineString15, :Inli
265272
return y
266273
else
267274
y = GC.@preserve buf unsafe_load(convert(Ptr{$T}, pointer(buf, pos)))
268-
sz = 8 * (sizeof($T) - len)
269275
# Clear unused bytes and set capacity byte
270-
cleared = Base.shl_int(Base.lshr_int(_bswap(y), sz), sz)
276+
cleared = clear_suffix_bytes(y, sizeof($T) - len)
271277
return set_capacity_byte(cleared, trailing_byte($T, len))
272278
end
273279
end
@@ -339,22 +345,37 @@ end
339345
Base.:(==)(x::T, y::T) where {T <: InlineString} = Base.eq_int(x, y)
340346
function Base.:(==)(x::String, y::T) where {T <: InlineString}
341347
sizeof(x) == sizeof(y) || return false
342-
ref = Ref{T}(_bswap(y))
348+
ref = Ref{T}(y)
343349
GC.@preserve x begin
344350
return ccall(:memcmp, Cint, (Ptr{UInt8}, Ref{T}, Csize_t),
345351
pointer(x), ref, sizeof(x)) == 0
346352
end
347353
end
348354
Base.:(==)(y::InlineString, x::String) = x == y
349355

350-
Base.cmp(a::T, b::T) where {T <: InlineString} =
351-
Base.eq_int(a, b) ? 0 : Base.ult_int(a, b) ? -1 : 1
356+
function Base.cmp(a::T, b::T) where {T <: InlineString}
357+
Base.eq_int(a, b) && return 0
358+
359+
len_a = ncodeunits(a)
360+
len_b = ncodeunits(b)
361+
min_len = min(len_a, len_b)
362+
363+
for i in 1:min_len
364+
byte_a = get_byte(a, i)
365+
byte_b = get_byte(b, i)
366+
if byte_a != byte_b
367+
return byte_a < byte_b ? -1 : 1
368+
end
369+
end
370+
371+
return len_a < len_b ? -1 : (len_a > len_b ? 1 : 0)
372+
end
352373

353374
@static if isdefined(Base, :hash_bytes)
354375

355376
function Base.hash(x::T, h::UInt) where {T <: InlineString}
356377
len = ncodeunits(x)
357-
ref = Ref{T}(_bswap(x))
378+
ref = Ref{T}(x)
358379
GC.@preserve ref begin
359380
ptr = convert(Ptr{UInt8}, Base.unsafe_convert(Ptr{T}, ref))
360381
return Base.hash_bytes(ptr, len, UInt64(h), Base.HASH_SECRET) % UInt
@@ -365,7 +386,7 @@ else
365386

366387
function Base.hash(x::T, h::UInt) where {T <: InlineString}
367388
h += Base.memhash_seed
368-
ref = Ref{T}(_bswap(x))
389+
ref = Ref{T}(x)
369390
return ccall(Base.memhash, UInt,
370391
(Ref{T}, Csize_t, UInt32),
371392
ref, sizeof(x), h % UInt32) + h
@@ -395,7 +416,7 @@ function Base.read(s::IO, ::Type{T}) where {T <: InlineString}
395416
end
396417

397418
function Base.print(io::IO, x::T) where {T <: InlineString}
398-
ref = Ref{T}(_bswap(x))
419+
ref = Ref{T}(x)
399420
return GC.@preserve ref begin
400421
ptr = convert(Ptr{UInt8}, Base.unsafe_convert(Ptr{T}, ref))
401422
unsafe_write(io, ptr, sizeof(x))
@@ -405,14 +426,9 @@ end
405426

406427
function Base.isascii(x::T) where {T <: InlineString}
407428
len = ncodeunits(x)
408-
x = Base.lshr_int(x, 8 * (sizeof(T) - len))
409-
for _ = 1:(len >> 2)
410-
y = Base.trunc_int(UInt32, x)
411-
(y & 0xff000000) >= 0x80000000 && return false
412-
(y & 0x00ff0000) >= 0x00800000 && return false
413-
(y & 0x0000ff00) >= 0x00008000 && return false
414-
(y & 0x000000ff) >= 0x00000080 && return false
415-
x = Base.lshr_int(x, 32)
429+
for i in 1:len
430+
byte_val = get_byte(x, i)
431+
byte_val >= 0x80 && return false
416432
end
417433
return true
418434
end
@@ -579,31 +595,34 @@ end
579595
Base.reverse(x::String1) = x
580596
function Base.reverse(s::T) where {T <: InlineString}
581597
nc = ncodeunits(s)
598+
nc <= 1 && return s
599+
600+
result = create_with_length(T, nc)
601+
582602
if isascii(s)
583-
len = Base.zext_int(T, get_capacity_byte(s))
584-
x = Base.or_int(Base.shl_int(_bswap(s), 8 * (sizeof(T) - nc)), len)
585-
return x
586-
end
587-
x = Base.zext_int(T, Base.trunc_int(UInt8, s))
588-
i = 1
589-
while i <= nc
590-
j = nextind(s, i)
591-
_x = Base.lshr_int(s, 8 * (sizeof(T) - (j - 1)))
592-
n = j - i
593-
_x = Base.and_int(_x, n == 1 ? Base.zext_int(T, 0xff) :
594-
n == 2 ? Base.zext_int(T, 0xffff) :
595-
n == 3 ? Base.zext_int(T, 0xffffff) :
596-
Base.zext_int(T, 0xffffffff))
597-
_x = Base.shl_int(_x, 8 * (sizeof(T) - (nc - (i - 1))))
598-
x = Base.or_int(x, _x)
599-
i = j
603+
for i in 1:nc
604+
result = set_byte(result, nc - i + 1, get_byte(s, i))
605+
end
606+
else
607+
dest_offs = nc + 1
608+
src_pos = 1
609+
610+
for c in s
611+
char_len = ncodeunits(c)
612+
dest_offs -= char_len
613+
for i in 1:char_len
614+
result = set_byte(result, dest_offs + i - 1, get_byte(s, src_pos + i - 1))
615+
end
616+
src_pos += char_len
617+
end
600618
end
601-
return x
619+
620+
return result
602621
end
603622

604623
@inline function Base.__unsafe_string!(out, x::T, offs::Integer) where {T <: InlineString}
605624
n = sizeof(x)
606-
ref = Ref{T}(_bswap(x))
625+
ref = Ref{T}(x)
607626
GC.@preserve ref out begin
608627
ptr = convert(Ptr{UInt8}, Base.unsafe_convert(Ptr{T}, ref))
609628
unsafe_copyto!(pointer(out, offs), ptr, n)
@@ -645,11 +664,24 @@ function _string(a::Ta, b::Tb) where {Ta <: SmallInlineStrings, Tb <: SmallInlin
645664
T = summed_type(Ta, Tb)
646665
len_a = sizeof(a)
647666
len_b = sizeof(b)
648-
# Remove length byte (lshr), grow to new size (zext), move chars forward (shl).
649-
a2 = Base.shl_int(Base.zext_int(T, Base.lshr_int(a, 8)), 8 * (sizeof(T) - sizeof(Ta) + 1))
650-
b2 = Base.shl_int(Base.zext_int(T, Base.lshr_int(b, 8)), 8 * (sizeof(T) - sizeof(Tb) + 1 - len_a))
651-
lb = _oftype(T, trailing_byte(T, len_a + len_b)) # new length byte
652-
return Base.or_int(Base.or_int(a2, b2), lb)
667+
total_len = len_a + len_b
668+
669+
# Create result with correct capacity
670+
result = create_with_length(T, total_len)
671+
672+
# Copy bytes from first string
673+
for i in 1:len_a
674+
byte_val = get_byte(a, i)
675+
result = set_byte(result, i, byte_val)
676+
end
677+
678+
# Copy bytes from second string
679+
for i in 1:len_b
680+
byte_val = get_byte(b, i)
681+
result = set_byte(result, len_a + i, byte_val)
682+
end
683+
684+
return result
653685
end
654686

655687
summed_type(::Type{InlineString1}, ::Type{InlineString1}) = InlineString3
@@ -675,7 +707,7 @@ function Base.repeat(x::T, r::Integer) where {T <: InlineString}
675707
ccall(:memset, Ptr{Cvoid}, (Ptr{UInt8}, Cint, Csize_t), out, b, r)
676708
else
677709
for i = 0:r-1
678-
ref = Ref{T}(_bswap(x))
710+
ref = Ref{T}(x)
679711
GC.@preserve ref out begin
680712
ptr = convert(Ptr{UInt8}, Base.unsafe_convert(Ptr{T}, ref))
681713
unsafe_copyto!(pointer(out, i * n + 1), ptr, n)
@@ -691,7 +723,7 @@ Base.startswith(a::InlineString, b::InlineString) = invoke(startswith, Tuple{Abs
691723
function Base.startswith(a::T, b::Union{String, SubString{String}}) where {T <: InlineString}
692724
cub = ncodeunits(b)
693725
ncodeunits(a) < cub && return false
694-
ref = Ref{T}(_bswap(a))
726+
ref = Ref{T}(a)
695727
return GC.@preserve ref begin
696728
ptr = convert(Ptr{UInt8}, Base.unsafe_convert(Ptr{T}, ref))
697729
if Base._memcmp(ptr, b, sizeof(b)) == 0
@@ -708,7 +740,7 @@ function Base.endswith(a::T, b::Union{String, SubString{String}}) where {T <: In
708740
cub = ncodeunits(b)
709741
astart = ncodeunits(a) - ncodeunits(b) + 1
710742
astart < 1 && return false
711-
ref = Ref{T}(_bswap(a))
743+
ref = Ref{T}(a)
712744
return GC.@preserve ref begin
713745
ptr = convert(Ptr{UInt8}, Base.unsafe_convert(Ptr{T}, ref))
714746
if Base._memcmp(ptr + (astart - 1), b, sizeof(b)) == 0
@@ -902,6 +934,7 @@ sortvalue(o::Perm, i::Int) = sortvalue(o.order, o.data[i])
902934
sortvalue(o::Lt, x ) = error("sortvalue does not work with general Lt Orderings")
903935
sortvalue(rev::ReverseOrdering, x) = Base.not_int(sortvalue(rev.fwd, x))
904936
sortvalue(::Base.ForwardOrdering, x) = x
937+
sortvalue(::Base.ForwardOrdering, x::InlineString) = Base.bswap_int(get_string_data(x))
905938

906939
_oftype(::Type{T}, x::S) where {T, S} = sizeof(T) == sizeof(S) ? Base.bitcast(T, x) : sizeof(T) > sizeof(S) ? Base.zext_int(T, x) : Base.trunc_int(T, x)
907940

0 commit comments

Comments
 (0)