diff --git a/NEWS.md b/NEWS.md index a29165e0fee9b..792016be2b39c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -640,6 +640,9 @@ Breaking changes This section lists changes that do not have deprecation warnings. + * The constructor of `SubString` now checks if the requsted view range + is defined by valid indices in the parent `AbstractString` ([#22511]). + * `readline`, `readlines` and `eachline` return lines without line endings by default. You *must* use `readline(s, chomp=false)`, etc. to get the old behavior where returned lines include trailing end-of-line character(s) ([#19944]). diff --git a/base/regex.jl b/base/regex.jl index 62dc2562340cd..41637ecc993b5 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -212,9 +212,10 @@ function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer, a end ovec = re.ovec n = div(length(ovec),2) - 1 - mat = SubString(str, ovec[1]+1, ovec[2]) - cap = Union{Void,SubString{String}}[ - ovec[2i+1] == PCRE.UNSET ? nothing : SubString(str, ovec[2i+1]+1, ovec[2i+2]) for i=1:n ] + mat = SubString(str, ovec[1]+1, prevind(str, ovec[2]+1)) + cap = Union{Void,SubString{String}}[ovec[2i+1] == PCRE.UNSET ? nothing : + SubString(str, ovec[2i+1]+1, + prevind(str, ovec[2i+2]+1)) for i=1:n] off = Int[ ovec[2i+1]+1 for i=1:n ] RegexMatch(mat, cap, ovec[1]+1, off, re) end diff --git a/base/shell.jl b/base/shell.jl index 841865790b9c9..0e62d8501744c 100644 --- a/base/shell.jl +++ b/base/shell.jl @@ -53,7 +53,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; while !done(s,j) c, k = next(s,j) if !in_single_quotes && !in_double_quotes && isspace(c) - update_arg(s[i:j-1]) + update_arg(s[i:prevind(s, j)]) append_arg() j = k while !done(s,j) @@ -65,7 +65,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; j = k end elseif interpolate && !in_single_quotes && c == '$' - update_arg(s[i:j-1]); i = k; j = k + update_arg(s[i:prevind(s, j)]); i = k; j = k if done(s,k) error("\$ right before end of command") end @@ -79,24 +79,24 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; else if !in_double_quotes && c == '\'' in_single_quotes = !in_single_quotes - update_arg(s[i:j-1]); i = k + update_arg(s[i:prevind(s, j)]); i = k elseif !in_single_quotes && c == '"' in_double_quotes = !in_double_quotes - update_arg(s[i:j-1]); i = k + update_arg(s[i:prevind(s, j)]); i = k elseif c == '\\' if in_double_quotes if done(s,k) error("unterminated double quote") end if s[k] == '"' || s[k] == '$' || s[k] == '\\' - update_arg(s[i:j-1]); i = k + update_arg(s[i:prevind(s, j)]); i = k c, k = next(s,k) end elseif !in_single_quotes if done(s,k) error("dangling backslash") end - update_arg(s[i:j-1]); i = k + update_arg(s[i:prevind(s, j)]); i = k c, k = next(s,k) end elseif !in_single_quotes && !in_double_quotes && c in special diff --git a/base/strings/types.jl b/base/strings/types.jl index 9e74a83a39d6a..fd1a387970104 100644 --- a/base/strings/types.jl +++ b/base/strings/types.jl @@ -4,32 +4,38 @@ ## substrings reference original strings ## +""" + SubString(s::AbstractString, i::Integer, j::Integer=endof(s)) + SubString(s::AbstractString, r::UnitRange{<:Integer}) + +Like [`getindex`](@ref), but returns a view into the parent string `s` +within range `i:j` or `r` respectively instead of making a copy. +""" struct SubString{T<:AbstractString} <: AbstractString string::T offset::Int endof::Int function SubString{T}(s::T, i::Int, j::Int) where T<:AbstractString - if i > endof(s) || j i - j -= 1 - end - - o = i-1 - new(s, o, max(0, j-o)) - end + i > j && return new(s, i - 1, 0) # always allow i > j as it is consistent with getindex + isvalid(s, i) || throw(BoundsError(s, i)) + isvalid(s, j) || throw(BoundsError(s, j)) + new(s, i-1, j-i+1) end end + SubString(s::T, i::Int, j::Int) where {T<:AbstractString} = SubString{T}(s, i, j) -SubString(s::SubString, i::Int, j::Int) = SubString(s.string, s.offset+i, s.offset+j) -SubString(s::AbstractString, i::Integer, j::Integer) = SubString(s, Int(i), Int(j)) -SubString(s::AbstractString, i::Integer) = SubString(s, i, endof(s)) +SubString(s::AbstractString, i::Integer, j::Integer=endof(s)) = SubString(s, Int(i), Int(j)) +SubString(s::AbstractString, r::UnitRange{<:Integer}) = SubString(s, first(r), last(r)) + +function SubString(s::SubString, i::Int, j::Int) + # always allow i > j as it is consistent with getindex + i > j && return SubString(s.string, s.offset + i, s.offset + j) + i >= 1 || throw(BoundsError(s, i)) + j <= endof(s) || throw(BoundsError(s, j)) + SubString(s.string, s.offset + i, s.offset + j) +end + SubString(s::AbstractString) = SubString(s, 1, endof(s)) SubString{T}(s::T) where {T<:AbstractString} = SubString{T}(s, 1, endof(s)) diff --git a/base/strings/util.jl b/base/strings/util.jl index fd9a055f06352..8fd145ac4cb0c 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -79,7 +79,7 @@ julia> chop(a) "Marc" ``` """ -chop(s::AbstractString) = SubString(s, 1, endof(s)-1) +chop(s::AbstractString) = SubString(s, 1, prevind(s, endof(s))) """ chomp(s::AbstractString) @@ -96,17 +96,17 @@ function chomp(s::AbstractString) i = endof(s) (i < 1 || s[i] != '\n') && (return SubString(s, 1, i)) j = prevind(s,i) - (j < 1 || s[j] != '\r') && (return SubString(s, 1, i-1)) - return SubString(s, 1, j-1) + (j < 1 || s[j] != '\r') && (return SubString(s, 1, j)) + return SubString(s, 1, prevind(s,j)) end function chomp(s::String) i = endof(s) if i < 1 || codeunit(s,i) != 0x0a SubString(s, 1, i) elseif i < 2 || codeunit(s,i-1) != 0x0d - SubString(s, 1, i-1) + SubString(s, 1, prevind(s, i)) else - SubString(s, 1, i-2) + SubString(s, 1, prevind(s, i-1)) end end diff --git a/test/regex.jl b/test/regex.jl index 6171e5846ffa8..3d9837f1c2486 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -47,3 +47,6 @@ end # Backcapture reference in substitution string @test replace("abcde", r"(..)(?Pd)", s"\gxy\\\1") == "adxy\\bce" @test_throws ErrorException replace("a", r"(?P)", s"\g") + +# Proper unicode handling +@test match(r"∀∀", "∀x∀∀∀").match == "∀∀" diff --git a/test/strings/basic.jl b/test/strings/basic.jl index 2da583f9474b5..adaa472f713a0 100644 --- a/test/strings/basic.jl +++ b/test/strings/basic.jl @@ -121,12 +121,14 @@ end @test ""[10:9] == "" @test "hello"[10:9] == "" @test "hellø"[10:9] == "" - @test SubString("hello", 1, 6)[10:9] == "" + @test SubString("hello", 1, 5)[10:9] == "" @test SubString("hello", 1, 0)[10:9] == "" - @test SubString("hellø", 1, 6)[10:9] == "" + @test SubString("hellø", 1, 5)[10:9] == "" @test SubString("hellø", 1, 0)[10:9] == "" - @test SubString("", 1, 6)[10:9] == "" @test SubString("", 1, 0)[10:9] == "" + + @test_throws BoundsError SubString("", 1, 6) + @test_throws BoundsError SubString("", 1, 1) end @testset "issue #22500 (using `get()` to index strings with default returns)" begin diff --git a/test/strings/search.jl b/test/strings/search.jl index 0462f9dfe0a5b..251d32ba5b256 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -29,7 +29,7 @@ end # @test_throws ErrorException in("foobar","bar") @test_throws BoundsError search(b"\x1\x2",0x1,0) @test rsearchindex(b"foo",b"o",0) == 0 -@test rsearchindex(SubString("",1,1),SubString("",1,1)) == 1 +@test rsearchindex(SubString("",1,0),SubString("",1,0)) == 1 @test search(b"foo",'o') == 2 @test rsearch(b"foo",'o') == 3 diff --git a/test/strings/types.jl b/test/strings/types.jl index 6efab9d92a18d..724817bcda256 100644 --- a/test/strings/types.jl +++ b/test/strings/types.jl @@ -25,25 +25,53 @@ for i1 = 1:length(u8str2) end end +# tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes +# gives the same result as `getindex` (except that it is a veiw not a copy) +for idx in 0:1 + @test SubString("∀", 1, idx) == "∀"[1:idx] +end + +# Substring provided with invalid end index throws BoundsError +@test_throws BoundsError SubString("∀", 1, 2) +@test_throws BoundsError SubString("∀", 1, 3) +@test_throws BoundsError SubString("∀", 1, 4) + +# Substring provided with invalid start index throws BoundsError +@test_throws BoundsError SubString("∀∀", 2:4) + +# tests for SubString of more than one multibyte `Char` string +# we are consistent with `getindex` for `String` +for idx in [0, 1, 4] + @test SubString("∀∀", 1, idx) == "∀∀"[1:idx] + @test SubString("∀∀", 4, idx) == "∀∀"[4:idx] +end + +# second index beyond endof("∀∀") +for idx in 5:8 + @test_throws BoundsError SubString("∀∀", 1, idx) + @test_throws BoundsError SubString("∀∀", 4, idx) +end + let str="tempus fugit" #length(str)==12 - ss=SubString(str,1,length(str)) #match source string + ss=SubString(str,1,endof(str)) #match source string + @test length(ss)==length(str) + + ss=SubString(str,1:endof(str)) @test length(ss)==length(str) ss=SubString(str,1,0) #empty SubString @test length(ss)==0 - ss=SubString(str,14,20) #start indexed beyond source string length + ss=SubString(str,1:0) @test length(ss)==0 - ss=SubString(str,10,16) #end indexed beyond source string length - @test length(ss)==3 + @test_throws BoundsError SubString(str,14,20) #start indexing beyond source string length + @test_throws BoundsError SubString(str,10,16) #end indexing beyond source string length - str2="" - ss=SubString(str2,1,4) #empty source string - @test length(ss)==0 - - ss=SubString(str2,1,1) #empty source string, identical start and end index - @test length(ss)==0 + @test_throws BoundsError SubString("", 1, 4) #empty source string + @test_throws BoundsError SubString("", 1, 1) #empty source string, identical start and end index + @test_throws BoundsError SubString("", 10, 12) + @test SubString("",12,10) == "" end @test SubString("foobar", big(1), big(3)) == "foo" @@ -55,7 +83,7 @@ let str = "aa\u2200\u2222bb" write(b, u) @test String(take!(b)) == "\u2200\u2222" - @test_throws ArgumentError SubString(str, 4, 5) + @test_throws BoundsError SubString(str, 4, 5) @test_throws BoundsError next(u, 0) @test_throws BoundsError next(u, 7) @test_throws BoundsError getindex(u, 0) @@ -68,6 +96,7 @@ let str = "aa\u2200\u2222bb" end let str = "føøbar" + @test_throws BoundsError SubString(str, 10, 10) u = SubString(str, 4, 3) @test length(u) == 0 b = IOBuffer() @@ -75,14 +104,6 @@ let str = "føøbar" @test String(take!(b)) == "" end -let str = "føøbar" - u = SubString(str, 10, 10) - @test length(u) == 0 - b = IOBuffer() - write(b, u) - @test String(take!(b)) == "" -end - # search and SubString (issue #5679) let str = "Hello, world!" u = SubString(str, 1, 5) @@ -91,6 +112,22 @@ let str = "Hello, world!" @test rsearch(u, "ll") == 3:4 end +# SubString created from SubString +let str = "Hello, world!" + u = SubString(str, 2, 5) + for idx in 1:4 + @test SubString(u, 2, idx) == u[2:idx] + @test SubString(u, 2:idx) == u[2:idx] + end + @test_throws BoundsError SubString(u, 1, 10) + @test_throws BoundsError SubString(u, 1:10) + @test_throws BoundsError SubString(u, 20:30) + @test SubString(u, 20:15) == "" + @test_throws BoundsError SubString(u, -1:10) + @test SubString(u, -1, -10) == "" + @test SubString(SubString("123", 1, 2), -10, -20) == "" +end + # sizeof @test sizeof(SubString("abc\u2222def",4,4)) == 3 @@ -115,8 +152,8 @@ let ss, s="lorem ipsum", SubString(s,1,6)=>"lorem ", SubString(s,1,0)=>"", SubString(s,2,4)=>"ore", - SubString(s,2,16)=>"orem ipsum", - SubString(s,12,14)=>"" + SubString(s,2,11)=>"orem ipsum", + SubString(s,15,14)=>"" ) for (ss,s) in sdict local ss @@ -140,9 +177,18 @@ end #let #for isvalid(SubString{String}) let s = "Σx + βz - 2" - for i in -1:(length(s) + 2) - local ss = SubString(s, 1, i) - @test isvalid(ss, i) == isvalid(s, i) + for i in -1:(length(s)+2) + if isvalid(s, i) + ss=SubString(s,1,i) + # make sure isvalid gives equivalent results for SubString and String + @test isvalid(ss,i)==isvalid(s,i) + else + if i > 0 + @test_throws BoundsError SubString(s,1,i) + else + @test SubString(s,1,i) == "" + end + end end end diff --git a/test/strings/util.jl b/test/strings/util.jl index 79918bd76aabd..9defbe27ebd44 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -230,7 +230,11 @@ end @testset "chomp/chop" begin @test chomp("foo\n") == "foo" + @test chomp("fo∀\n") == "fo∀" + @test chomp("fo∀") == "fo∀" @test chop("fooε") == "foo" + @test chop("foεo") == "foε" + @test chop("∃∃∃∃") == "∃∃∃" @test isa(chomp("foo"), SubString) @test isa(chop("foo"), SubString) end