7171
7272const SmallInlineStrings = Union{String1, String3, String7, String15}
7373
74- # used to zero out n lower bytes of an inline string
75- clear_n_bytes (s, n) = Base. shl_int (Base. lshr_int (s, 8 * n), 8 * n)
76- _bswap (x:: T ) where {T <: InlineString } = Base. bswap_int (x)
7774
78- # Byte access abstraction layer
7975@inline get_byte (x:: T , i:: Int ) where {T <: InlineString } =
80- Base. trunc_int (UInt8, Base. lshr_int (x, 8 * (sizeof (T) - i )))
76+ Base. trunc_int (UInt8, Base. lshr_int (x, 8 * (i - 1 )))
8177
8278@inline function set_byte (x:: T , i:: Int , b:: UInt8 ) where {T <: InlineString }
83- old_byte = get_byte (x, i )
84- bit_pos = 8 * ( sizeof (T) - i )
85- x = Base. xor_int (x, Base . shl_int (Base . zext_int (T, old_byte), bit_pos) )
86- return Base. or_int (x , Base. shl_int (Base. zext_int (T, b), bit_pos))
79+ bit_pos = 8 * (i - 1 )
80+ mask = Base . not_int (Base . shl_int (Base . zext_int (T, 0xff ), bit_pos) )
81+ cleared = Base. and_int (x, mask )
82+ return Base. or_int (cleared , Base. shl_int (Base. zext_int (T, b), bit_pos))
8783end
8884
89- @inline get_capacity_byte (x:: InlineString ) = Base. trunc_int (UInt8, x)
85+ @inline get_capacity_byte (x:: T ) where {T <: InlineString } =
86+ Base. trunc_int (UInt8, Base. lshr_int (x, 8 * (sizeof (T) - 1 )))
9087
9188@inline function set_capacity_byte (x:: T , b:: UInt8 ) where {T <: InlineString }
92- old_capacity = get_capacity_byte (x)
93- cleared = Base. xor_int (x, Base. zext_int (T, old_capacity))
94- return Base. or_int (cleared, Base. zext_int (T, b))
89+ bit_pos = 8 * (sizeof (T) - 1 )
90+ mask = Base. not_int (Base. shl_int (Base. zext_int (T, 0xff ), bit_pos))
91+ cleared = Base. and_int (x, mask)
92+ return Base. or_int (cleared, Base. shl_int (Base. zext_int (T, b), bit_pos))
9593end
9694
97- @inline clear_suffix_bytes (x:: InlineString , n:: Int ) = clear_n_bytes (x, n)
95+ @inline function clear_suffix_bytes (x:: T , n:: Int ) where {T <: InlineString }
96+ n == 0 && return x
97+ n >= sizeof (T) && return create_with_length (T, 0 )
98+ result = create_with_length (T, 0 )
99+ keep_bytes = sizeof (T) - n
100+ for i in 1 : keep_bytes
101+ result = set_byte (result, i, get_byte (x, i))
102+ end
103+ return result
104+ end
98105
99106@inline function clear_prefix_bytes (x:: T , n:: Int ) where {T <: InlineString }
107+ n == 0 && return x
100108 capacity = get_capacity_byte (x)
101- without_capacity = Base. xor_int (x, Base. zext_int (T, capacity ))
102- shifted = Base. shl_int (without_capacity , 8 * n)
103- return Base . or_int (shifted, Base . zext_int (T, capacity) )
109+ data_only = Base. and_int (x, Base. not_int (Base . shl_int (Base . zext_int (T, 0xff ), 8 * ( sizeof (T) - 1 )) ))
110+ shifted_data = Base. lshr_int (data_only , 8 * n)
111+ return set_capacity_byte (shifted_data, capacity)
104112end
105113
106- @inline create_with_length (:: Type{T} , length:: Int ) where {T <: InlineString } =
107- Base. zext_int (T, trailing_byte (T, length))
114+ @inline function create_with_length (:: Type{T} , length:: Int ) where {T <: InlineString }
115+ capacity_byte = trailing_byte (T, length)
116+ return Base. shl_int (Base. zext_int (T, capacity_byte), 8 * (sizeof (T) - 1 ))
117+ end
108118
109- @inline get_string_data (x:: InlineString ) = Base. lshr_int (x, 8 )
119+ @inline function get_string_data (x:: T ) where {T <: InlineString }
120+ capacity_mask = Base. shl_int (Base. zext_int (T, 0xff ), 8 * (sizeof (T) - 1 ))
121+ return Base. and_int (x, Base. not_int (capacity_mask))
122+ end
110123
111124@inline function resize_string_data (x:: S , :: Type{T} ) where {S <: InlineString , T <: InlineString }
112125 sizeof (T) == sizeof (S) && return x
113- if sizeof (T) > sizeof (S)
114- data = get_string_data (x)
115- return Base. shl_int (Base. zext_int (T, data), 8 * (sizeof (T) - sizeof (S) + 1 ))
116- else
117- shift = 8 * (sizeof (S) - sizeof (T))
118- return Base. trunc_int (T, Base. lshr_int (x, shift))
119- end
126+ data = get_string_data (x)
127+ return sizeof (T) > sizeof (S) ? Base. zext_int (T, data) : Base. trunc_int (T, data)
120128end
121129
122130const InlineStringTypes = Union{InlineString1,
170178function Base. String (x:: T ) where {T <: InlineString }
171179 len = ncodeunits (x)
172180 out = Base. _string_n (len)
173- ref = Ref {T} (_bswap (x) )
181+ ref = Ref {T} (x )
174182 GC. @preserve ref out begin
175183 ptr = convert (Ptr{UInt8}, Base. unsafe_convert (Ptr{T}, ref))
176184 unsafe_copyto! (pointer (out), ptr, len)
@@ -179,17 +187,17 @@ function Base.String(x::T) where {T <: InlineString}
179187end
180188
181189function Base. Symbol (x:: T ) where {T <: InlineString }
182- ref = Ref {T} (_bswap (x) )
190+ ref = Ref {T} (x )
183191 return ccall (:jl_symbol_n , Ref{Symbol},
184192 (Ref{T}, Int), ref, sizeof (x))
185193end
186194
187195Base. cconvert (:: Type{Ptr{UInt8}} , x:: T ) where {T <: InlineString } =
188- Ref {T} (_bswap (x) )
196+ Ref {T} (x )
189197Base. cconvert (:: Type{Ptr{Int8}} , x:: T ) where {T <: InlineString } =
190- Ref {T} (_bswap (x) )
198+ Ref {T} (x )
191199function Base. cconvert (:: Type{Cstring} , x:: T ) where {T <: InlineString }
192- ref = Ref {T} (_bswap (x) )
200+ ref = Ref {T} (x )
193201 Base. containsnul (Ptr {Int8} (pointer_from_objref (ref)), sizeof (x)) &&
194202 throw (ArgumentError (" embedded NULs are not allowed in C strings: $x " ))
195203 return ref
@@ -234,9 +242,8 @@ for T in (:InlineString1, :InlineString3, :InlineString7, :InlineString15, :Inli
234242 len = sizeof (x)
235243 len < sizeof ($ T) || stringtoolong ($ T, len)
236244 y = GC. @preserve x unsafe_load (convert (Ptr{$ T}, pointer (x)))
237- sz = 8 * (sizeof ($ T) - len)
238245 # Clear unused bytes and set capacity byte
239- cleared = Base . shl_int (Base . lshr_int ( _bswap (y), sz), sz )
246+ cleared = clear_suffix_bytes (y, sizeof ( $ T) - len )
240247 return set_capacity_byte (cleared, trailing_byte ($ T, len))
241248 else
242249 len = ncodeunits (x)
@@ -265,9 +272,8 @@ for T in (:InlineString1, :InlineString3, :InlineString7, :InlineString15, :Inli
265272 return y
266273 else
267274 y = GC. @preserve buf unsafe_load (convert (Ptr{$ T}, pointer (buf, pos)))
268- sz = 8 * (sizeof ($ T) - len)
269275 # Clear unused bytes and set capacity byte
270- cleared = Base . shl_int (Base . lshr_int ( _bswap (y), sz), sz )
276+ cleared = clear_suffix_bytes (y, sizeof ( $ T) - len )
271277 return set_capacity_byte (cleared, trailing_byte ($ T, len))
272278 end
273279 end
@@ -339,22 +345,37 @@ end
339345Base.:(== )(x:: T , y:: T ) where {T <: InlineString } = Base. eq_int (x, y)
340346function Base.:(== )(x:: String , y:: T ) where {T <: InlineString }
341347 sizeof (x) == sizeof (y) || return false
342- ref = Ref {T} (_bswap (y) )
348+ ref = Ref {T} (y )
343349 GC. @preserve x begin
344350 return ccall (:memcmp , Cint, (Ptr{UInt8}, Ref{T}, Csize_t),
345351 pointer (x), ref, sizeof (x)) == 0
346352 end
347353end
348354Base.:(== )(y:: InlineString , x:: String ) = x == y
349355
350- Base. cmp (a:: T , b:: T ) where {T <: InlineString } =
351- Base. eq_int (a, b) ? 0 : Base. ult_int (a, b) ? - 1 : 1
356+ function Base. cmp (a:: T , b:: T ) where {T <: InlineString }
357+ Base. eq_int (a, b) && return 0
358+
359+ len_a = ncodeunits (a)
360+ len_b = ncodeunits (b)
361+ min_len = min (len_a, len_b)
362+
363+ for i in 1 : min_len
364+ byte_a = get_byte (a, i)
365+ byte_b = get_byte (b, i)
366+ if byte_a != byte_b
367+ return byte_a < byte_b ? - 1 : 1
368+ end
369+ end
370+
371+ return len_a < len_b ? - 1 : (len_a > len_b ? 1 : 0 )
372+ end
352373
353374@static if isdefined (Base, :hash_bytes )
354375
355376function Base. hash (x:: T , h:: UInt ) where {T <: InlineString }
356377 len = ncodeunits (x)
357- ref = Ref {T} (_bswap (x) )
378+ ref = Ref {T} (x )
358379 GC. @preserve ref begin
359380 ptr = convert (Ptr{UInt8}, Base. unsafe_convert (Ptr{T}, ref))
360381 return Base. hash_bytes (ptr, len, UInt64 (h), Base. HASH_SECRET) % UInt
365386
366387function Base. hash (x:: T , h:: UInt ) where {T <: InlineString }
367388 h += Base. memhash_seed
368- ref = Ref {T} (_bswap (x) )
389+ ref = Ref {T} (x )
369390 return ccall (Base. memhash, UInt,
370391 (Ref{T}, Csize_t, UInt32),
371392 ref, sizeof (x), h % UInt32) + h
@@ -395,7 +416,7 @@ function Base.read(s::IO, ::Type{T}) where {T <: InlineString}
395416end
396417
397418function Base. print (io:: IO , x:: T ) where {T <: InlineString }
398- ref = Ref {T} (_bswap (x) )
419+ ref = Ref {T} (x )
399420 return GC. @preserve ref begin
400421 ptr = convert (Ptr{UInt8}, Base. unsafe_convert (Ptr{T}, ref))
401422 unsafe_write (io, ptr, sizeof (x))
405426
406427function Base. isascii (x:: T ) where {T <: InlineString }
407428 len = ncodeunits (x)
408- x = Base. lshr_int (x, 8 * (sizeof (T) - len))
409- for _ = 1 : (len >> 2 )
410- y = Base. trunc_int (UInt32, x)
411- (y & 0xff000000 ) >= 0x80000000 && return false
412- (y & 0x00ff0000 ) >= 0x00800000 && return false
413- (y & 0x0000ff00 ) >= 0x00008000 && return false
414- (y & 0x000000ff ) >= 0x00000080 && return false
415- x = Base. lshr_int (x, 32 )
429+ for i in 1 : len
430+ byte_val = get_byte (x, i)
431+ byte_val >= 0x80 && return false
416432 end
417433 return true
418434end
@@ -579,31 +595,34 @@ end
579595Base. reverse (x:: String1 ) = x
580596function Base. reverse (s:: T ) where {T <: InlineString }
581597 nc = ncodeunits (s)
598+ nc <= 1 && return s
599+
600+ result = create_with_length (T, nc)
601+
582602 if isascii (s)
583- len = Base. zext_int (T, get_capacity_byte (s))
584- x = Base. or_int (Base. shl_int (_bswap (s), 8 * (sizeof (T) - nc)), len)
585- return x
586- end
587- x = Base. zext_int (T, Base. trunc_int (UInt8, s))
588- i = 1
589- while i <= nc
590- j = nextind (s, i)
591- _x = Base. lshr_int (s, 8 * (sizeof (T) - (j - 1 )))
592- n = j - i
593- _x = Base. and_int (_x, n == 1 ? Base. zext_int (T, 0xff ) :
594- n == 2 ? Base. zext_int (T, 0xffff ) :
595- n == 3 ? Base. zext_int (T, 0xffffff ) :
596- Base. zext_int (T, 0xffffffff ))
597- _x = Base. shl_int (_x, 8 * (sizeof (T) - (nc - (i - 1 ))))
598- x = Base. or_int (x, _x)
599- i = j
603+ for i in 1 : nc
604+ result = set_byte (result, nc - i + 1 , get_byte (s, i))
605+ end
606+ else
607+ dest_offs = nc + 1
608+ src_pos = 1
609+
610+ for c in s
611+ char_len = ncodeunits (c)
612+ dest_offs -= char_len
613+ for i in 1 : char_len
614+ result = set_byte (result, dest_offs + i - 1 , get_byte (s, src_pos + i - 1 ))
615+ end
616+ src_pos += char_len
617+ end
600618 end
601- return x
619+
620+ return result
602621end
603622
604623@inline function Base. __unsafe_string! (out, x:: T , offs:: Integer ) where {T <: InlineString }
605624 n = sizeof (x)
606- ref = Ref {T} (_bswap (x) )
625+ ref = Ref {T} (x )
607626 GC. @preserve ref out begin
608627 ptr = convert (Ptr{UInt8}, Base. unsafe_convert (Ptr{T}, ref))
609628 unsafe_copyto! (pointer (out, offs), ptr, n)
@@ -645,11 +664,24 @@ function _string(a::Ta, b::Tb) where {Ta <: SmallInlineStrings, Tb <: SmallInlin
645664 T = summed_type (Ta, Tb)
646665 len_a = sizeof (a)
647666 len_b = sizeof (b)
648- # Remove length byte (lshr), grow to new size (zext), move chars forward (shl).
649- a2 = Base. shl_int (Base. zext_int (T, Base. lshr_int (a, 8 )), 8 * (sizeof (T) - sizeof (Ta) + 1 ))
650- b2 = Base. shl_int (Base. zext_int (T, Base. lshr_int (b, 8 )), 8 * (sizeof (T) - sizeof (Tb) + 1 - len_a))
651- lb = _oftype (T, trailing_byte (T, len_a + len_b)) # new length byte
652- return Base. or_int (Base. or_int (a2, b2), lb)
667+ total_len = len_a + len_b
668+
669+ # Create result with correct capacity
670+ result = create_with_length (T, total_len)
671+
672+ # Copy bytes from first string
673+ for i in 1 : len_a
674+ byte_val = get_byte (a, i)
675+ result = set_byte (result, i, byte_val)
676+ end
677+
678+ # Copy bytes from second string
679+ for i in 1 : len_b
680+ byte_val = get_byte (b, i)
681+ result = set_byte (result, len_a + i, byte_val)
682+ end
683+
684+ return result
653685end
654686
655687summed_type (:: Type{InlineString1} , :: Type{InlineString1} ) = InlineString3
@@ -675,7 +707,7 @@ function Base.repeat(x::T, r::Integer) where {T <: InlineString}
675707 ccall (:memset , Ptr{Cvoid}, (Ptr{UInt8}, Cint, Csize_t), out, b, r)
676708 else
677709 for i = 0 : r- 1
678- ref = Ref {T} (_bswap (x) )
710+ ref = Ref {T} (x )
679711 GC. @preserve ref out begin
680712 ptr = convert (Ptr{UInt8}, Base. unsafe_convert (Ptr{T}, ref))
681713 unsafe_copyto! (pointer (out, i * n + 1 ), ptr, n)
@@ -691,7 +723,7 @@ Base.startswith(a::InlineString, b::InlineString) = invoke(startswith, Tuple{Abs
691723function Base. startswith (a:: T , b:: Union{String, SubString{String}} ) where {T <: InlineString }
692724 cub = ncodeunits (b)
693725 ncodeunits (a) < cub && return false
694- ref = Ref {T} (_bswap (a) )
726+ ref = Ref {T} (a )
695727 return GC. @preserve ref begin
696728 ptr = convert (Ptr{UInt8}, Base. unsafe_convert (Ptr{T}, ref))
697729 if Base. _memcmp (ptr, b, sizeof (b)) == 0
@@ -708,7 +740,7 @@ function Base.endswith(a::T, b::Union{String, SubString{String}}) where {T <: In
708740 cub = ncodeunits (b)
709741 astart = ncodeunits (a) - ncodeunits (b) + 1
710742 astart < 1 && return false
711- ref = Ref {T} (_bswap (a) )
743+ ref = Ref {T} (a )
712744 return GC. @preserve ref begin
713745 ptr = convert (Ptr{UInt8}, Base. unsafe_convert (Ptr{T}, ref))
714746 if Base. _memcmp (ptr + (astart - 1 ), b, sizeof (b)) == 0
@@ -902,6 +934,7 @@ sortvalue(o::Perm, i::Int) = sortvalue(o.order, o.data[i])
902934sortvalue (o:: Lt , x ) = error (" sortvalue does not work with general Lt Orderings" )
903935sortvalue (rev:: ReverseOrdering , x) = Base. not_int (sortvalue (rev. fwd, x))
904936sortvalue (:: Base.ForwardOrdering , x) = x
937+ sortvalue (:: Base.ForwardOrdering , x:: InlineString ) = Base. bswap_int (get_string_data (x))
905938
906939_oftype (:: Type{T} , x:: S ) where {T, S} = sizeof (T) == sizeof (S) ? Base. bitcast (T, x) : sizeof (T) > sizeof (S) ? Base. zext_int (T, x) : Base. trunc_int (T, x)
907940
0 commit comments