From 6b1cc86c32a5c745c9832cabd6a3b5c002540a06 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Wed, 22 Nov 2017 14:22:27 +0000 Subject: [PATCH 1/4] reformat some long lines in REPL completions --- base/repl/REPLCompletions.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/base/repl/REPLCompletions.jl b/base/repl/REPLCompletions.jl index 5523ab544ca3d..2040476af8947 100644 --- a/base/repl/REPLCompletions.jl +++ b/base/repl/REPLCompletions.jl @@ -245,11 +245,14 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')') in_back_ticks = true end else - if !in_back_ticks && !in_double_quotes && c == '\'' && !done(r, i) && next(r, i)[1]!='\\' + if !in_back_ticks && !in_double_quotes && + c == '\'' && !done(r, i) && next(r, i)[1] != '\\' in_single_quotes = !in_single_quotes - elseif !in_back_ticks && !in_single_quotes && c == '"' && !done(r, i) && next(r, i)[1]!='\\' + elseif !in_back_ticks && !in_single_quotes && + c == '"' && !done(r, i) && next(r, i)[1] != '\\' in_double_quotes = !in_double_quotes - elseif !in_single_quotes && !in_double_quotes && c == '`' && !done(r, i) && next(r, i)[1]!='\\' + elseif !in_single_quotes && !in_double_quotes && + c == '`' && !done(r, i) && next(r, i)[1] != '\\' in_back_ticks = !in_back_ticks end end From 05f8e42c823a838279f569b4667b804d6644a3cc Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Thu, 16 Nov 2017 12:16:43 -0500 Subject: [PATCH 2/4] ncodeunits(s::AbstractString) gives number of code units --- NEWS.md | 4 ++++ base/exports.jl | 1 + base/strings/basic.jl | 4 ++++ base/strings/string.jl | 14 ++++++++++++++ test/strings/basic.jl | 8 ++++++++ 5 files changed, 31 insertions(+) diff --git a/NEWS.md b/NEWS.md index c280cc864979d..8ad8c403bf1d4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -409,6 +409,10 @@ Library improvements * The `keys` of an `Associative` are now an `AbstractSet`. `Base.KeyIterator{<:Associative}` has been changed to `KeySet{K, <:Associative{K}} <: AbstractSet{K}` ([#24580]). + * New function `ncodeunits(s::AbstractString)` gives the number of code units in a string. + The generic definition is constant time but calls `endof(s)` which may be inefficient. + Therefore custom string types may want to define direct `ncodeunits` methods. + Compiler/Runtime improvements ----------------------------- diff --git a/base/exports.jl b/base/exports.jl index 31af9f6ce7813..019babfb65f98 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -756,6 +756,7 @@ export lstrip, match, matchall, + ncodeunits, ndigits, nextind, normalize_string, diff --git a/base/strings/basic.jl b/base/strings/basic.jl index cd7ea3cd8ea98..45271839f868a 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -69,6 +69,10 @@ julia> 'j' * "ulia" one(::Union{T,Type{T}}) where {T<:AbstractString} = convert(T, "") +# generic number of code units; implementations generally know how long a string +# is though and should override this with a more efficient method +ncodeunits(s::AbstractString) = nextind(s, endof(s)) - 1 + """ length(s::AbstractString) diff --git a/base/strings/string.jl b/base/strings/string.jl index c5c998f7f4419..e81ae90c7bd10 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -87,6 +87,20 @@ codeunit(s::AbstractString, i::Integer) @gc_preserve s unsafe_load(pointer(s, i)) end +""" + ncodeunits(s::AbstractString) + +The number of code units in a string. For example, for UTF-8-like data such as +the default `String` type, the number of code units is the number of bytes in +the string, a.k.a. `sizeof(s)`. For a UTF-16 encoded string type, however, the +code unit is `UInt16` so the number of code units is the number of `UInt16` +words in the representation of the string. The expression `codeunit(s, i)` is +valid and safe for precisely the range of `i` values `1:ncodeunits(s)`. + +See also: [`codeunit`](@ref). +""" +ncodeunits(s::String) = sizeof(s) + write(io::IO, s::String) = @gc_preserve s unsafe_write(io, pointer(s), reinterpret(UInt, sizeof(s))) diff --git a/test/strings/basic.jl b/test/strings/basic.jl index cac55ddae374a..423d5519d06e4 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -696,3 +696,11 @@ end @test String(take!(b)) == "UnicodeError: invalid character index 2 (0xba is a continuation byte)" end +@testset "ncodeunits" begin + for (s, n) in ["" => 0, "a" => 1, "abc" => 3, + "α" => 2, "abγ" => 4, "∀" => 3, + "∀x∃y" => 8, "🍕" => 4, "🍕∀" => 7] + @test ncodeunits(s) == n + @test ncodeunits(GenericString(s)) == n + end +end From d887fb62b9034a08714f68185cca9bcd8e51a4d3 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Wed, 22 Nov 2017 13:38:11 +0000 Subject: [PATCH 3/4] thisind(s, i): return `ncodeunits(s) + 1` if `i > ncodeunits(s)` --- base/strings/basic.jl | 20 +++++++++++++------- base/strings/string.jl | 4 ++-- base/strings/types.jl | 8 ++++---- test/strings/basic.jl | 5 +++-- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/base/strings/basic.jl b/base/strings/basic.jl index 45271839f868a..734f1cc6f9041 100644 --- a/base/strings/basic.jl +++ b/base/strings/basic.jl @@ -237,11 +237,11 @@ end ## Generic indexing functions ## """ - thisind(str::AbstractString, i::Integer) + thisind(s::AbstractString, i::Integer) -Get the largest valid string index at or before `i`. -Returns `0` if there is no valid string index at or before `i`. -Returns `endof(str)` if `i≥endof(str)`. +If `i` is the index into a character in `s` then `thisind` returns the index of the +start of that character. If `i < start(s)` then it returns `start(s) - 1`. +If `i > ncodeunits(s)` then it returns `ncodeunits(s) + 1`. # Examples ```jldoctest @@ -257,15 +257,21 @@ julia> thisind("αβγdef", 3) julia> thisind("αβγdef", 4) 3 -julia> thisind("αβγdef", 20) +julia> thisind("αβγdef", 9) 9 + +julia> thisind("αβγdef", 10) +10 + +julia> thisind("αβγdef", 20) +10 """ function thisind(s::AbstractString, i::Integer) j = Int(i) isvalid(s, j) && return j j < start(s) && return 0 - e = endof(s) - j >= endof(s) && return e + n = ncodeunits(s) + j > n && return n + 1 prevind(s, j) end diff --git a/base/strings/string.jl b/base/strings/string.jl index e81ae90c7bd10..1c8cae4501b18 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -123,8 +123,8 @@ end function thisind(s::String, i::Integer) j = Int(i) j < 1 && return 0 - e = endof(s) - j >= e && return e + n = ncodeunits(s) + j > n && return n + 1 @inbounds while j > 0 && is_valid_continuation(codeunit(s,j)) j -= 1 end diff --git a/base/strings/types.jl b/base/strings/types.jl index 9e45b09f19311..e4474e5cb7edc 100644 --- a/base/strings/types.jl +++ b/base/strings/types.jl @@ -88,16 +88,16 @@ end function thisind(s::SubString{String}, i::Integer) j = Int(i) - j < 1 && return 0 - e = endof(s) - j >= e && return e + j < start(s) && return 0 + n = ncodeunits(s) + j > n && return n + 1 offset = s.offset str = s.string j += offset @inbounds while j > offset && is_valid_continuation(codeunit(str, j)) j -= 1 end - j-offset + j - offset end nextind(s::SubString, i::Integer) = nextind(s.string, i+s.offset)-s.offset diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 423d5519d06e4..384da5d8a70f3 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -580,13 +580,14 @@ end @test thisind(s, 6) == 6 @test thisind(s, 15) == 15 @test thisind(s, 16) == 15 - @test thisind(s, 30) == 15 + @test thisind(s, 17) == 17 + @test thisind(s, 30) == 17 end end let strs = Any["", s"", SubString("123", 2, 1), SubString(s"123", 2, 1)] for s in strs, i in -2:2 - @test thisind(s, i) == 0 + @test thisind(s, i) == (i > 0) end end end From 5167f17355180047ef27eccfea1d5f0e635843e5 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Wed, 22 Nov 2017 15:39:06 +0000 Subject: [PATCH 4/4] remove `RevString`; efficient generic `reverseind` These seem unrelated, but they're actually linked: * If you reverse generic strings by wrapping them in `RevString` then then this generic `reverseind` is incorrect. * In order to have a correct generic `reverseind` one needs to assume that `reverse(s)` returns a string of the same type and encoding as `s` with code points in reverse order; one also needs to assume that the code units encoding each character remain the same when reversed. This is a valid assumption for UTF-8, UTF-16 and (trivially) UTF-32. Reverse string search functions are pretty messed up by this and I've fixed them well enough to work but they may be quite inefficient for long strings now. I'm not going to spend too much time on this since there's other work going on to generalize and unify searching APIs. Close #22611 Close #24613 See also: #10593 #23612 #24103 --- NEWS.md | 11 +++++ base/exports.jl | 1 - base/precompile.jl | 3 -- base/repl/REPLCompletions.jl | 4 +- base/shell.jl | 2 +- base/strings/search.jl | 52 +++++++++++------------ base/strings/string.jl | 40 ------------------ base/strings/strings.jl | 2 +- base/strings/{types.jl => substring.jl} | 55 ++++++++++--------------- base/strings/util.jl | 15 ++++--- doc/src/stdlib/strings.md | 2 +- stdlib/Test/src/Test.jl | 3 ++ test/replcompletions.jl | 2 +- test/strings/types.jl | 34 +++++++-------- test/unicode/utf8.jl | 18 ++++---- 15 files changed, 97 insertions(+), 147 deletions(-) rename base/strings/{types.jl => substring.jl} (80%) diff --git a/NEWS.md b/NEWS.md index 8ad8c403bf1d4..b2df3ca485fb5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -308,6 +308,12 @@ This section lists changes that do not have deprecation warnings. `AbstractArray` types that specialized broadcasting using the old internal API will need to switch to the new API. ([#20740]) + * The `RevString` type has been removed from the language; `reverse(::String)` returns + a `String` with code points (or fragments thereof) in reverse order. In general, + `reverse(s)` should return a string of the same type and encoding as `s` with code + points in reverse order; any string type overrides `reverse` to return a different + type of string must also override `reverseind` to compute reversed indices correctly. + Library improvements -------------------- @@ -413,6 +419,11 @@ Library improvements The generic definition is constant time but calls `endof(s)` which may be inefficient. Therefore custom string types may want to define direct `ncodeunits` methods. + * `reverseind(s::AbstractString, i::Integer)` now has an efficient generic fallback, so + custom string types do not need to provide their own efficient defintions. The generic + definition relies on `ncodeunits` however, so for optimal performance you may need to + define a custom method for that function. + Compiler/Runtime improvements ----------------------------- diff --git a/base/exports.jl b/base/exports.jl index 019babfb65f98..9756109b9263f 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -88,7 +88,6 @@ export Rational, Regex, RegexMatch, - RevString, RoundFromZero, RoundDown, RoundingMode, diff --git a/base/precompile.jl b/base/precompile.jl index c7aefb8991d6b..d191f3dac0a73 100644 --- a/base/precompile.jl +++ b/base/precompile.jl @@ -578,9 +578,6 @@ precompile(Tuple{typeof(Base.LineEdit.complete_line), Base.LineEdit.PromptState, precompile(Tuple{typeof(Base.LineEdit.input_string_newlines_aftercursor), Base.LineEdit.PromptState}) precompile(Tuple{typeof(Base.LineEdit.complete_line), Base.REPL.REPLCompletionProvider, Base.LineEdit.PromptState}) precompile(Tuple{getfield(Base, Symbol("#kw##parse")), Array{Any, 1}, typeof(Base.parse), String}) -precompile(Tuple{typeof(Base.isvalid), Base.RevString{String}, Int64}) -precompile(Tuple{typeof(Base.nextind), Base.RevString{String}, Int64}) -precompile(Tuple{typeof(Base.search), Base.RevString{String}, Array{Char, 1}, Int64}) precompile(Tuple{typeof(Base.rsearch), String, Array{Char, 1}, Int64}) precompile(Tuple{getfield(Base.REPLCompletions, Symbol("#kw##find_start_brace")), Array{Any, 1}, typeof(Base.REPLCompletions.find_start_brace), String}) precompile(Tuple{typeof(Core.Inference.isbits), Tuple{Void, Void, Void}}) diff --git a/base/repl/REPLCompletions.jl b/base/repl/REPLCompletions.jl index 2040476af8947..3e5056d613f26 100644 --- a/base/repl/REPLCompletions.jl +++ b/base/repl/REPLCompletions.jl @@ -225,7 +225,7 @@ end # closed start brace from the end of the string. function find_start_brace(s::AbstractString; c_start='(', c_end=')') braces = 0 - r = RevString(s) + r = reverse(s) i = start(r) in_single_quotes = false in_double_quotes = false @@ -259,7 +259,7 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')') braces == 1 && break end braces != 1 && return 0:-1, -1 - method_name_end = reverseind(r, i) + method_name_end = reverseind(s, i) startind = nextind(s, rsearch(s, non_identifier_chars, method_name_end)) return (startind:endof(s), method_name_end) end diff --git a/base/shell.jl b/base/shell.jl index b8923ced43d04..72ffd23d9a944 100644 --- a/base/shell.jl +++ b/base/shell.jl @@ -14,7 +14,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; special::AbstractString="") s = lstrip(str) # strips the end but respects the space when the string ends with "\\ " - r = RevString(s) + r = reverse(s) i = start(r) c_old = nothing while !done(r,i) diff --git a/base/strings/search.jl b/base/strings/search.jl index 23f813ea28b26..43e880a26b9e5 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -194,12 +194,6 @@ end search(s::AbstractString, t::AbstractString, i::Integer=start(s)) = _search(s, t, i) search(s::ByteArray, t::ByteArray, i::Integer=start(s)) = _search(s, t, i) -function rsearch(s::AbstractString, c::Chars) - j = search(RevString(s), c) - j == 0 && return 0 - endof(s)-j+1 -end - """ rsearch(s::AbstractString, chars::Chars, [start::Integer]) @@ -212,44 +206,50 @@ julia> rsearch("aaabbb","b") 6:6 ``` """ -function rsearch(s::AbstractString, c::Chars, i::Integer) - e = endof(s) - j = search(RevString(s), c, e-i+1) - j == 0 && return 0 - e-j+1 +function rsearch(s::AbstractString, c::Chars, i::Integer=start(s)) + if i < 1 + return i == 0 ? 0 : throw(BoundsError(s, i)) + end + n = ncodeunits(s) + if i > n + return i == n+1 ? 0 : throw(BoundsError(s, i)) + end + # r[reverseind(r,i)] == reverse(r)[i] == s[i] + # s[reverseind(s,j)] == reverse(s)[j] == r[j] + r = reverse(s) + j = search(r, c, reverseind(r, i)) + j == 0 ? 0 : reverseind(s, j) end function _rsearchindex(s, t, i) if isempty(t) - return 1 <= i <= nextind(s,endof(s)) ? i : + return 1 <= i <= nextind(s, endof(s)) ? i : throw(BoundsError(s, i)) end - t = RevString(t) - rs = RevString(s) + t = reverse(t) + rs = reverse(s) l = endof(s) - t1, j2 = next(t,start(t)) + t1, j2 = next(t, start(t)) while true - i = rsearch(s,t1,i) - if i == 0 return 0 end - c, ii = next(rs,l-i+1) + i = rsearch(s, t1, i) + i == 0 && return 0 + c, ii = next(rs, reverseind(rs, i)) j = j2; k = ii matched = true - while !done(t,j) - if done(rs,k) + while !done(t, j) + if done(rs, k) matched = false break end - c, k = next(rs,k) - d, j = next(t,j) + c, k = next(rs, k) + d, j = next(t, j) if c != d matched = false break end end - if matched - return nextind(s,l-k+1) - end - i = l-ii+1 + matched && return nextind(s, reverseind(s, k)) + i = reverseind(s, ii) end end diff --git a/base/strings/string.jl b/base/strings/string.jl index 1c8cae4501b18..e66f876a5f77d 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -295,14 +295,6 @@ function first_utf8_byte(ch::Char) return b end -function reverseind(s::String, i::Integer) - j = sizeof(s) + 1 - i - @inbounds while is_valid_continuation(codeunit(s, j)) - j -= 1 - end - return j -end - ## overload methods for efficiency ## isvalid(s::String, i::Integer) = @@ -477,38 +469,6 @@ function string(a::Union{String,Char}...) return out end -function reverse(s::String) - dat = Vector{UInt8}(s) - n = length(dat) - n <= 1 && return s - buf = StringVector(n) - out = n - pos = 1 - @inbounds while out > 0 - ch = dat[pos] - if ch > 0xdf - if ch < 0xf0 - (out -= 3) < 0 && throw(UnicodeError(UTF_ERR_SHORT, pos, ch)) - buf[out + 1], buf[out + 2], buf[out + 3] = ch, dat[pos + 1], dat[pos + 2] - pos += 3 - else - (out -= 4) < 0 && throw(UnicodeError(UTF_ERR_SHORT, pos, ch)) - buf[out+1], buf[out+2], buf[out+3], buf[out+4] = ch, dat[pos+1], dat[pos+2], dat[pos+3] - pos += 4 - end - elseif ch > 0x7f - (out -= 2) < 0 && throw(UnicodeError(UTF_ERR_SHORT, pos, ch)) - buf[out + 1], buf[out + 2] = ch, dat[pos + 1] - pos += 2 - else - buf[out] = ch - out -= 1 - pos += 1 - end - end - String(buf) -end - function repeat(s::String, r::Integer) r < 0 && throw(ArgumentError("can't repeat a string $r times")) n = sizeof(s) diff --git a/base/strings/strings.jl b/base/strings/strings.jl index 0acad00f79ca7..1e442a09e406f 100644 --- a/base/strings/strings.jl +++ b/base/strings/strings.jl @@ -1,7 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license include("strings/errors.jl") -include("strings/types.jl") +include("strings/substring.jl") include("strings/basic.jl") include("strings/search.jl") include("strings/util.jl") diff --git a/base/strings/types.jl b/base/strings/substring.jl similarity index 80% rename from base/strings/types.jl rename to base/strings/substring.jl index e4474e5cb7edc..d1bf33e4123fb 100644 --- a/base/strings/types.jl +++ b/base/strings/substring.jl @@ -1,9 +1,5 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -# SubString and RevString types - -## substrings reference original strings ## - """ SubString(s::AbstractString, i::Integer, j::Integer=endof(s)) SubString(s::AbstractString, r::UnitRange{<:Integer}) @@ -51,6 +47,9 @@ end SubString(s::AbstractString) = SubString(s, 1, endof(s)) SubString{T}(s::T) where {T<:AbstractString} = SubString{T}(s, 1, endof(s)) +convert(::Type{SubString{S}}, s::AbstractString) where {S<:AbstractString} = + SubString(convert(S, s)) + String(p::SubString{String}) = unsafe_string(pointer(p.string, p.offset+1), nextind(p, p.endof)-1) @@ -123,32 +122,18 @@ function unsafe_convert(::Type{Ptr{R}}, s::SubString{String}) where R<:Union{Int convert(Ptr{R}, pointer(s.string)) + s.offset end -## reversed strings without data movement ## - -struct RevString{T<:AbstractString} <: AbstractString - string::T -end - -endof(s::RevString) = endof(s.string) -length(s::RevString) = length(s.string) -sizeof(s::RevString) = sizeof(s.string) - -function next(s::RevString, i::Int) - n = endof(s); j = n-i+1 - (s.string[j], n-prevind(s.string,j)+1) -end - """ reverse(s::AbstractString) -> AbstractString -Reverses a string. - -Technically, this function reverses the codepoints in a string, and its +Reverses a string. Technically, this function reverses the codepoints in a string and its main utility is for reversed-order string processing, especially for reversed -regular-expression searches. See also [`reverseind`](@ref) to convert indices -in `s` to indices in `reverse(s)` and vice-versa, and [`graphemes`](@ref) -to operate on user-visible "characters" (graphemes) rather than codepoints. -See also [`Iterators.reverse`](@ref) for reverse-order iteration without making a copy. +regular-expression searches. See also [`reverseind`](@ref) to convert indices in `s` to +indices in `reverse(s)` and vice-versa, and [`graphemes`](@ref) to operate on user-visible +"characters" (graphemes) rather than codepoints. See also [`Iterators.reverse`](@ref) for +reverse-order iteration without making a copy. Custom string types must implement the +`reverse` function themselves and should typically return a string with the same type +and encoding. If they return a string with a different encoding, they must also override +`reverseind` for that string type to satisfy `s[reverseind(s,i)] == reverse(s)[i]`. # Examples ```jldoctest @@ -162,10 +147,15 @@ julia> join(reverse(collect(graphemes("ax̂e")))) # reverses graphemes "ex̂a" ``` """ -reverse(s::AbstractString) = RevString(s) -reverse(s::RevString) = s.string - -## reverse an index i so that reverse(s)[i] == s[reverseind(s,i)] +function reverse(s::Union{String,SubString{String}})::String + sprint() do io + i, j = start(s), endof(s) + while i ≤ j + c, j = s[j], prevind(s, j) + write(io, c) + end + end +end """ reverseind(v, i) @@ -185,10 +175,7 @@ julia> for i in 1:length(r) Julia ``` """ -reverseind(s::AbstractString, i) = chr2ind(s, length(s) + 1 - ind2chr(reverse(s), i)) -reverseind(s::RevString, i::Integer) = endof(s) - i + 1 -reverseind(s::SubString{String}, i::Integer) = - reverseind(s.string, nextind(s.string, endof(s.string))-s.offset-s.endof+i-1) - s.offset +reverseind(s::AbstractString, i::Integer) = thisind(s, ncodeunits(s)-i+1) """ repeat(s::AbstractString, r::Integer) diff --git a/base/strings/util.jl b/base/strings/util.jl index ace3d93bcb24f..db230a16da0c6 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -190,16 +190,15 @@ julia> rstrip(a) ``` """ function rstrip(s::AbstractString, chars::Chars=_default_delims) - r = RevString(s) - i = start(r) - while !done(r,i) - c, j = next(r,i) - if !(c in chars) - return SubString(s, 1, endof(s)-i+1) - end + a = start(s) + i = endof(s) + while a ≤ i + c = s[i] + j = prevind(s, i) + c in chars || return SubString(s, 1:i) i = j end - SubString(s, 1, 0) + SubString(s, a, a-1) end """ diff --git a/doc/src/stdlib/strings.md b/doc/src/stdlib/strings.md index 36ed901262f92..ec561b8a810a6 100644 --- a/doc/src/stdlib/strings.md +++ b/doc/src/stdlib/strings.md @@ -39,7 +39,7 @@ Base.rsearch Base.searchindex Base.rsearchindex Base.contains(::AbstractString, ::AbstractString) -Base.reverse(::AbstractString) +Base.reverse(::Union{String,SubString{String}}) Base.replace Base.split Base.rsplit diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl index ac97ef36a0e23..2bd86ddaefa00 100644 --- a/stdlib/Test/src/Test.jl +++ b/stdlib/Test/src/Test.jl @@ -1387,6 +1387,9 @@ struct GenericString <: AbstractString end Base.endof(s::GenericString) = endof(s.string) Base.next(s::GenericString, i::Int) = next(s.string, i) +Base.reverse(s::GenericString) = GenericString(reverse(s.string)) +Base.reverse(s::SubString{GenericString}) = + GenericString(typeof(s.string)(reverse(String(s)))) """ The `GenericSet` can be used to test generic set APIs that program to diff --git a/test/replcompletions.jl b/test/replcompletions.jl index a45fb05c7d28e..9698b1622fdf9 100644 --- a/test/replcompletions.jl +++ b/test/replcompletions.jl @@ -3,7 +3,7 @@ using Base.REPLCompletions let ex = quote - module CompletionFoo + module CompletionFoo mutable struct Test_y yy end diff --git a/test/strings/types.jl b/test/strings/types.jl index a9ce276c02961..12dd75a1bd421 100644 --- a/test/strings/types.jl +++ b/test/strings/types.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -## SubString, RevString and Cstring tests ## +## SubString and Cstring tests ## ## SubString tests ## u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε" @@ -207,31 +207,13 @@ let s = "|η(α)-ϕ(κ)| < ε" @test length(SubString(s,4,11))==length(s[4:11]) end -## Reverse strings ## - -let rs = RevString("foobar") - @test length(rs) == 6 - @test sizeof(rs) == 6 - @test isascii(rs) -end - -# issue #4586 -@test rsplit(RevString("ailuj"),'l') == ["ju","ia"] -@test parse(Float64,RevString("64")) === 46.0 - -# reverseind -for T in (String, GenericString) +@testset "reverseind" for T in (String, SubString, GenericString) for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1") for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4") for c in ('X', 'δ', '\U0001d6a5') s = convert(T, string(prefix, c, suffix)) r = reverse(s) ri = search(r, c) - @test r == RevString(s) - @test c == s[reverseind(s, ri)] == r[ri] - s = RevString(s) - r = reverse(s) - ri = search(r, c) @test c == s[reverseind(s, ri)] == r[ri] s = convert(T, string(prefix, prefix, c, suffix, suffix)) pre = convert(T, prefix) @@ -244,6 +226,18 @@ for T in (String, GenericString) end end +@testset "reverseind of empty strings" begin + for s in ("", + SubString("", 1, 0), + SubString("ab", 1, 0), + SubString("ab", 2, 1), + SubString("ab", 3, 2), + GenericString("")) + @test reverseind(s, 0) == 1 + @test reverseind(s, 1) == 0 + end +end + ## Cstring tests ## # issue #13974: comparison against pointers diff --git a/test/unicode/utf8.jl b/test/unicode/utf8.jl index bdb9664fc26fb..a9db6316d2fa9 100644 --- a/test/unicode/utf8.jl +++ b/test/unicode/utf8.jl @@ -27,16 +27,16 @@ end @test reverse("a") == "a" @test reverse("abc") == "cba" @test reverse("xyz\uff\u800\uffff\U10ffff") == "\U10ffff\uffff\u800\uffzyx" - for str in [ - b"xyz\xc1", - b"xyz\xd0", - b"xyz\xe0", - b"xyz\xed\x80", - b"xyz\xf0", - b"xyz\xf0\x80", - b"xyz\xf0\x80\x80" + for (s, r) in [ + b"xyz\xc1" => b"\xc1zyx", + b"xyz\xd0" => b"\xd0zyx", + b"xyz\xe0" => b"\xe0zyx", + b"xyz\xed\x80" => b"\xed\x80zyx", + b"xyz\xf0" => b"\xf0zyx", + b"xyz\xf0\x80" => b"\xf0\x80zyx", + b"xyz\xf0\x80\x80" => b"\xf0\x80\x80zyx", ] - @test_throws UnicodeError reverse(String(str)) + @test_broken reverse(String(s)) == String(r) end end