diff --git a/NEWS.md b/NEWS.md index 0c12a86faac3d..087e30ae5ef1f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -290,6 +290,12 @@ This section lists changes that do not have deprecation warnings. Its return value has been removed. Use the `process_running` function to determine if a process has already exited. + * The `RevString` type has been removed from the language; `reverse(::String)` returns + a `String` with code points (or fragments thereof) in reverse order. In general, + `reverse(s)` should return a string of the same type and encoding as `s` with code + points in reverse order; any string type that does so much override the `reverseind` + function to correctly compute reversed indices into the string type it returns. + Library improvements -------------------- @@ -392,6 +398,11 @@ Library improvements The generic definition is constant time but calls `endof(s)` which may be inefficient. Therefore custom string types may want to define direct `ncodeunits` methods. + * `reverseind(s::AbstractString, i::Integer)` now has an efficient generic fallback, so + custom string types do not need to provide their own efficient defintions. The generic + definition relies on `ncodeunits` however, so for optimal performance you may need to + define a custom method for that function. + Compiler/Runtime improvements ----------------------------- diff --git a/base/exports.jl b/base/exports.jl index 2eec4350fcaf2..1497f597740f9 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -88,7 +88,6 @@ export Rational, Regex, RegexMatch, - RevString, RoundFromZero, RoundDown, RoundingMode, diff --git a/base/precompile.jl b/base/precompile.jl index b047275fbd82f..2e290813b5368 100644 --- a/base/precompile.jl +++ b/base/precompile.jl @@ -581,9 +581,6 @@ precompile(Tuple{typeof(Base.LineEdit.complete_line), Base.LineEdit.PromptState, precompile(Tuple{typeof(Base.LineEdit.input_string_newlines_aftercursor), Base.LineEdit.PromptState}) precompile(Tuple{typeof(Base.LineEdit.complete_line), Base.REPL.REPLCompletionProvider, Base.LineEdit.PromptState}) precompile(Tuple{getfield(Base, Symbol("#kw##parse")), Array{Any, 1}, typeof(Base.parse), String}) -precompile(Tuple{typeof(Base.isvalid), Base.RevString{String}, Int64}) -precompile(Tuple{typeof(Base.nextind), Base.RevString{String}, Int64}) -precompile(Tuple{typeof(Base.search), Base.RevString{String}, Array{Char, 1}, Int64}) precompile(Tuple{typeof(Base.rsearch), String, Array{Char, 1}, Int64}) precompile(Tuple{getfield(Base.REPLCompletions, Symbol("#kw##find_start_brace")), Array{Any, 1}, typeof(Base.REPLCompletions.find_start_brace), String}) precompile(Tuple{typeof(Core.Inference.isbits), Tuple{Void, Void, Void}}) diff --git a/base/repl/REPLCompletions.jl b/base/repl/REPLCompletions.jl index 2040476af8947..3e5056d613f26 100644 --- a/base/repl/REPLCompletions.jl +++ b/base/repl/REPLCompletions.jl @@ -225,7 +225,7 @@ end # closed start brace from the end of the string. function find_start_brace(s::AbstractString; c_start='(', c_end=')') braces = 0 - r = RevString(s) + r = reverse(s) i = start(r) in_single_quotes = false in_double_quotes = false @@ -259,7 +259,7 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')') braces == 1 && break end braces != 1 && return 0:-1, -1 - method_name_end = reverseind(r, i) + method_name_end = reverseind(s, i) startind = nextind(s, rsearch(s, non_identifier_chars, method_name_end)) return (startind:endof(s), method_name_end) end diff --git a/base/shell.jl b/base/shell.jl index b8923ced43d04..72ffd23d9a944 100644 --- a/base/shell.jl +++ b/base/shell.jl @@ -14,7 +14,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; special::AbstractString="") s = lstrip(str) # strips the end but respects the space when the string ends with "\\ " - r = RevString(s) + r = reverse(s) i = start(r) c_old = nothing while !done(r,i) diff --git a/base/strings/search.jl b/base/strings/search.jl index 23f813ea28b26..43e880a26b9e5 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -194,12 +194,6 @@ end search(s::AbstractString, t::AbstractString, i::Integer=start(s)) = _search(s, t, i) search(s::ByteArray, t::ByteArray, i::Integer=start(s)) = _search(s, t, i) -function rsearch(s::AbstractString, c::Chars) - j = search(RevString(s), c) - j == 0 && return 0 - endof(s)-j+1 -end - """ rsearch(s::AbstractString, chars::Chars, [start::Integer]) @@ -212,44 +206,50 @@ julia> rsearch("aaabbb","b") 6:6 ``` """ -function rsearch(s::AbstractString, c::Chars, i::Integer) - e = endof(s) - j = search(RevString(s), c, e-i+1) - j == 0 && return 0 - e-j+1 +function rsearch(s::AbstractString, c::Chars, i::Integer=start(s)) + if i < 1 + return i == 0 ? 0 : throw(BoundsError(s, i)) + end + n = ncodeunits(s) + if i > n + return i == n+1 ? 0 : throw(BoundsError(s, i)) + end + # r[reverseind(r,i)] == reverse(r)[i] == s[i] + # s[reverseind(s,j)] == reverse(s)[j] == r[j] + r = reverse(s) + j = search(r, c, reverseind(r, i)) + j == 0 ? 0 : reverseind(s, j) end function _rsearchindex(s, t, i) if isempty(t) - return 1 <= i <= nextind(s,endof(s)) ? i : + return 1 <= i <= nextind(s, endof(s)) ? i : throw(BoundsError(s, i)) end - t = RevString(t) - rs = RevString(s) + t = reverse(t) + rs = reverse(s) l = endof(s) - t1, j2 = next(t,start(t)) + t1, j2 = next(t, start(t)) while true - i = rsearch(s,t1,i) - if i == 0 return 0 end - c, ii = next(rs,l-i+1) + i = rsearch(s, t1, i) + i == 0 && return 0 + c, ii = next(rs, reverseind(rs, i)) j = j2; k = ii matched = true - while !done(t,j) - if done(rs,k) + while !done(t, j) + if done(rs, k) matched = false break end - c, k = next(rs,k) - d, j = next(t,j) + c, k = next(rs, k) + d, j = next(t, j) if c != d matched = false break end end - if matched - return nextind(s,l-k+1) - end - i = l-ii+1 + matched && return nextind(s, reverseind(s, k)) + i = reverseind(s, ii) end end diff --git a/base/strings/string.jl b/base/strings/string.jl index a7b152488faea..86c3cd82ca566 100644 --- a/base/strings/string.jl +++ b/base/strings/string.jl @@ -295,14 +295,6 @@ function first_utf8_byte(ch::Char) return b end -function reverseind(s::String, i::Integer) - j = sizeof(s) + 1 - i - @inbounds while is_valid_continuation(codeunit(s, j)) - j -= 1 - end - return j -end - ## overload methods for efficiency ## isvalid(s::String, i::Integer) = diff --git a/base/strings/strings.jl b/base/strings/strings.jl index 0acad00f79ca7..1e442a09e406f 100644 --- a/base/strings/strings.jl +++ b/base/strings/strings.jl @@ -1,7 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license include("strings/errors.jl") -include("strings/types.jl") +include("strings/substring.jl") include("strings/basic.jl") include("strings/search.jl") include("strings/util.jl") diff --git a/base/strings/types.jl b/base/strings/substring.jl similarity index 78% rename from base/strings/types.jl rename to base/strings/substring.jl index e4474e5cb7edc..38a2a57e333df 100644 --- a/base/strings/types.jl +++ b/base/strings/substring.jl @@ -1,9 +1,5 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -# SubString and RevString types - -## substrings reference original strings ## - """ SubString(s::AbstractString, i::Integer, j::Integer=endof(s)) SubString(s::AbstractString, r::UnitRange{<:Integer}) @@ -123,32 +119,13 @@ function unsafe_convert(::Type{Ptr{R}}, s::SubString{String}) where R<:Union{Int convert(Ptr{R}, pointer(s.string)) + s.offset end -## reversed strings without data movement ## - -struct RevString{T<:AbstractString} <: AbstractString - string::T -end - -endof(s::RevString) = endof(s.string) -length(s::RevString) = length(s.string) -sizeof(s::RevString) = sizeof(s.string) - -function next(s::RevString, i::Int) - n = endof(s); j = n-i+1 - (s.string[j], n-prevind(s.string,j)+1) -end - """ - reverse(s::AbstractString) -> AbstractString - -Reverses a string. + reverse(s::AbstractString) -> String -Technically, this function reverses the codepoints in a string, and its -main utility is for reversed-order string processing, especially for reversed -regular-expression searches. See also [`reverseind`](@ref) to convert indices -in `s` to indices in `reverse(s)` and vice-versa, and [`graphemes`](@ref) -to operate on user-visible "characters" (graphemes) rather than codepoints. -See also [`Iterators.reverse`](@ref) for reverse-order iteration without making a copy. +Reverses a string. Technically, this function reverses the codepoints in a string. See also +[`reverseind`](@ref) to convert indices in `s` to indices in `reverse(s)` and vice-versa, and +[`graphemes`](@ref) to operate on user-visible "characters" (graphemes) rather than codepoints. See +also [`Iterators.reverse`](@ref) for reverse-order iteration without making a copy. # Examples ```jldoctest @@ -162,10 +139,15 @@ julia> join(reverse(collect(graphemes("ax̂e")))) # reverses graphemes "ex̂a" ``` """ -reverse(s::AbstractString) = RevString(s) -reverse(s::RevString) = s.string - -## reverse an index i so that reverse(s)[i] == s[reverseind(s,i)] +function reverse(s::AbstractString)::String + sprint() do io + i, j = start(s), endof(s) + while i ≤ j + c, j = s[j], prevind(s, j) + write(io, c) + end + end +end """ reverseind(v, i) @@ -185,10 +167,7 @@ julia> for i in 1:length(r) Julia ``` """ -reverseind(s::AbstractString, i) = chr2ind(s, length(s) + 1 - ind2chr(reverse(s), i)) -reverseind(s::RevString, i::Integer) = endof(s) - i + 1 -reverseind(s::SubString{String}, i::Integer) = - reverseind(s.string, nextind(s.string, endof(s.string))-s.offset-s.endof+i-1) - s.offset +reverseind(s::AbstractString, i::Integer) = thisind(s, ncodeunits(s)-i+1) """ repeat(s::AbstractString, r::Integer) diff --git a/base/strings/util.jl b/base/strings/util.jl index 0e556aa822ffe..dae171b36fef2 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -190,16 +190,15 @@ julia> rstrip(a) ``` """ function rstrip(s::AbstractString, chars::Chars=_default_delims) - r = RevString(s) - i = start(r) - while !done(r,i) - c, j = next(r,i) - if !(c in chars) - return SubString(s, 1, endof(s)-i+1) - end + a = start(s) + i = endof(s) + while a ≤ i + c = s[i] + j = prevind(s, i) + c in chars || return SubString(s, 1:i) i = j end - SubString(s, 1, 0) + SubString(s, a, a-1) end """ diff --git a/test/replcompletions.jl b/test/replcompletions.jl index a45fb05c7d28e..9698b1622fdf9 100644 --- a/test/replcompletions.jl +++ b/test/replcompletions.jl @@ -3,7 +3,7 @@ using Base.REPLCompletions let ex = quote - module CompletionFoo + module CompletionFoo mutable struct Test_y yy end diff --git a/test/strings/types.jl b/test/strings/types.jl index a9ce276c02961..12dd75a1bd421 100644 --- a/test/strings/types.jl +++ b/test/strings/types.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -## SubString, RevString and Cstring tests ## +## SubString and Cstring tests ## ## SubString tests ## u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε" @@ -207,31 +207,13 @@ let s = "|η(α)-ϕ(κ)| < ε" @test length(SubString(s,4,11))==length(s[4:11]) end -## Reverse strings ## - -let rs = RevString("foobar") - @test length(rs) == 6 - @test sizeof(rs) == 6 - @test isascii(rs) -end - -# issue #4586 -@test rsplit(RevString("ailuj"),'l') == ["ju","ia"] -@test parse(Float64,RevString("64")) === 46.0 - -# reverseind -for T in (String, GenericString) +@testset "reverseind" for T in (String, SubString, GenericString) for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1") for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4") for c in ('X', 'δ', '\U0001d6a5') s = convert(T, string(prefix, c, suffix)) r = reverse(s) ri = search(r, c) - @test r == RevString(s) - @test c == s[reverseind(s, ri)] == r[ri] - s = RevString(s) - r = reverse(s) - ri = search(r, c) @test c == s[reverseind(s, ri)] == r[ri] s = convert(T, string(prefix, prefix, c, suffix, suffix)) pre = convert(T, prefix) @@ -244,6 +226,18 @@ for T in (String, GenericString) end end +@testset "reverseind of empty strings" begin + for s in ("", + SubString("", 1, 0), + SubString("ab", 1, 0), + SubString("ab", 2, 1), + SubString("ab", 3, 2), + GenericString("")) + @test reverseind(s, 0) == 1 + @test reverseind(s, 1) == 0 + end +end + ## Cstring tests ## # issue #13974: comparison against pointers