Skip to content

Commit

Permalink
Only accept valid indices in SubString constructors (#22511)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored and nalimilan committed Oct 13, 2017
1 parent 42790a8 commit 39f668c
Show file tree
Hide file tree
Showing 10 changed files with 124 additions and 59 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,9 @@ Breaking changes

This section lists changes that do not have deprecation warnings.

* The constructor of `SubString` now checks if the requsted view range
is defined by valid indices in the parent `AbstractString` ([#22511]).

* `readline`, `readlines` and `eachline` return lines without line endings by default.
You *must* use `readline(s, chomp=false)`, etc. to get the old behavior where
returned lines include trailing end-of-line character(s) ([#19944]).
Expand Down
7 changes: 4 additions & 3 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,10 @@ function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer, a
end
ovec = re.ovec
n = div(length(ovec),2) - 1
mat = SubString(str, ovec[1]+1, ovec[2])
cap = Union{Void,SubString{String}}[
ovec[2i+1] == PCRE.UNSET ? nothing : SubString(str, ovec[2i+1]+1, ovec[2i+2]) for i=1:n ]
mat = SubString(str, ovec[1]+1, prevind(str, ovec[2]+1))
cap = Union{Void,SubString{String}}[ovec[2i+1] == PCRE.UNSET ? nothing :
SubString(str, ovec[2i+1]+1,
prevind(str, ovec[2i+2]+1)) for i=1:n]
off = Int[ ovec[2i+1]+1 for i=1:n ]
RegexMatch(mat, cap, ovec[1]+1, off, re)
end
Expand Down
12 changes: 6 additions & 6 deletions base/shell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
while !done(s,j)
c, k = next(s,j)
if !in_single_quotes && !in_double_quotes && isspace(c)
update_arg(s[i:j-1])
update_arg(s[i:prevind(s, j)])
append_arg()
j = k
while !done(s,j)
Expand All @@ -65,7 +65,7 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
j = k
end
elseif interpolate && !in_single_quotes && c == '$'
update_arg(s[i:j-1]); i = k; j = k
update_arg(s[i:prevind(s, j)]); i = k; j = k
if done(s,k)
error("\$ right before end of command")
end
Expand All @@ -79,24 +79,24 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
else
if !in_double_quotes && c == '\''
in_single_quotes = !in_single_quotes
update_arg(s[i:j-1]); i = k
update_arg(s[i:prevind(s, j)]); i = k
elseif !in_single_quotes && c == '"'
in_double_quotes = !in_double_quotes
update_arg(s[i:j-1]); i = k
update_arg(s[i:prevind(s, j)]); i = k
elseif c == '\\'
if in_double_quotes
if done(s,k)
error("unterminated double quote")
end
if s[k] == '"' || s[k] == '$' || s[k] == '\\'
update_arg(s[i:j-1]); i = k
update_arg(s[i:prevind(s, j)]); i = k
c, k = next(s,k)
end
elseif !in_single_quotes
if done(s,k)
error("dangling backslash")
end
update_arg(s[i:j-1]); i = k
update_arg(s[i:prevind(s, j)]); i = k
c, k = next(s,k)
end
elseif !in_single_quotes && !in_double_quotes && c in special
Expand Down
40 changes: 23 additions & 17 deletions base/strings/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,38 @@

## substrings reference original strings ##

"""
SubString(s::AbstractString, i::Integer, j::Integer=endof(s))
SubString(s::AbstractString, r::UnitRange{<:Integer})
Like [`getindex`](@ref), but returns a view into the parent string `s`
within range `i:j` or `r` respectively instead of making a copy.
"""
struct SubString{T<:AbstractString} <: AbstractString
string::T
offset::Int
endof::Int

function SubString{T}(s::T, i::Int, j::Int) where T<:AbstractString
if i > endof(s) || j<i
return new(s, i-1, 0)
else
if !isvalid(s,i)
throw(ArgumentError("invalid SubString index"))
end

while !isvalid(s,j) && j > i
j -= 1
end

o = i-1
new(s, o, max(0, j-o))
end
i > j && return new(s, i - 1, 0) # always allow i > j as it is consistent with getindex
isvalid(s, i) || throw(BoundsError(s, i))
isvalid(s, j) || throw(BoundsError(s, j))
new(s, i-1, j-i+1)
end
end

SubString(s::T, i::Int, j::Int) where {T<:AbstractString} = SubString{T}(s, i, j)
SubString(s::SubString, i::Int, j::Int) = SubString(s.string, s.offset+i, s.offset+j)
SubString(s::AbstractString, i::Integer, j::Integer) = SubString(s, Int(i), Int(j))
SubString(s::AbstractString, i::Integer) = SubString(s, i, endof(s))
SubString(s::AbstractString, i::Integer, j::Integer=endof(s)) = SubString(s, Int(i), Int(j))
SubString(s::AbstractString, r::UnitRange{<:Integer}) = SubString(s, first(r), last(r))

function SubString(s::SubString, i::Int, j::Int)
# always allow i > j as it is consistent with getindex
i > j && return SubString(s.string, s.offset + i, s.offset + j)
i >= 1 || throw(BoundsError(s, i))
j <= endof(s) || throw(BoundsError(s, j))
SubString(s.string, s.offset + i, s.offset + j)
end

SubString(s::AbstractString) = SubString(s, 1, endof(s))
SubString{T}(s::T) where {T<:AbstractString} = SubString{T}(s, 1, endof(s))

Expand Down
10 changes: 5 additions & 5 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ julia> chop(a)
"Marc"
```
"""
chop(s::AbstractString) = SubString(s, 1, endof(s)-1)
chop(s::AbstractString) = SubString(s, 1, prevind(s, endof(s)))

"""
chomp(s::AbstractString)
Expand All @@ -96,17 +96,17 @@ function chomp(s::AbstractString)
i = endof(s)
(i < 1 || s[i] != '\n') && (return SubString(s, 1, i))
j = prevind(s,i)
(j < 1 || s[j] != '\r') && (return SubString(s, 1, i-1))
return SubString(s, 1, j-1)
(j < 1 || s[j] != '\r') && (return SubString(s, 1, j))
return SubString(s, 1, prevind(s,j))
end
function chomp(s::String)
i = endof(s)
if i < 1 || codeunit(s,i) != 0x0a
SubString(s, 1, i)
elseif i < 2 || codeunit(s,i-1) != 0x0d
SubString(s, 1, i-1)
SubString(s, 1, prevind(s, i))
else
SubString(s, 1, i-2)
SubString(s, 1, prevind(s, i-1))
end
end

Expand Down
3 changes: 3 additions & 0 deletions test/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,6 @@ end
# Backcapture reference in substitution string
@test replace("abcde", r"(..)(?P<byname>d)", s"\g<byname>xy\\\1") == "adxy\\bce"
@test_throws ErrorException replace("a", r"(?P<x>)", s"\g<y>")

# Proper unicode handling
@test match(r"∀∀", "∀x∀∀∀").match == "∀∀"
8 changes: 5 additions & 3 deletions test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,14 @@ end
@test ""[10:9] == ""
@test "hello"[10:9] == ""
@test "hellø"[10:9] == ""
@test SubString("hello", 1, 6)[10:9] == ""
@test SubString("hello", 1, 5)[10:9] == ""
@test SubString("hello", 1, 0)[10:9] == ""
@test SubString("hellø", 1, 6)[10:9] == ""
@test SubString("hellø", 1, 5)[10:9] == ""
@test SubString("hellø", 1, 0)[10:9] == ""
@test SubString("", 1, 6)[10:9] == ""
@test SubString("", 1, 0)[10:9] == ""

@test_throws BoundsError SubString("", 1, 6)
@test_throws BoundsError SubString("", 1, 1)
end

@testset "issue #22500 (using `get()` to index strings with default returns)" begin
Expand Down
2 changes: 1 addition & 1 deletion test/strings/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ end
# @test_throws ErrorException in("foobar","bar")
@test_throws BoundsError search(b"\x1\x2",0x1,0)
@test rsearchindex(b"foo",b"o",0) == 0
@test rsearchindex(SubString("",1,1),SubString("",1,1)) == 1
@test rsearchindex(SubString("",1,0),SubString("",1,0)) == 1

@test search(b"foo",'o') == 2
@test rsearch(b"foo",'o') == 3
Expand Down
94 changes: 70 additions & 24 deletions test/strings/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,53 @@ for i1 = 1:length(u8str2)
end
end

# tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes
# gives the same result as `getindex` (except that it is a veiw not a copy)
for idx in 0:1
@test SubString("", 1, idx) == ""[1:idx]
end

# Substring provided with invalid end index throws BoundsError
@test_throws BoundsError SubString("", 1, 2)
@test_throws BoundsError SubString("", 1, 3)
@test_throws BoundsError SubString("", 1, 4)

# Substring provided with invalid start index throws BoundsError
@test_throws BoundsError SubString("∀∀", 2:4)

# tests for SubString of more than one multibyte `Char` string
# we are consistent with `getindex` for `String`
for idx in [0, 1, 4]
@test SubString("∀∀", 1, idx) == "∀∀"[1:idx]
@test SubString("∀∀", 4, idx) == "∀∀"[4:idx]
end

# second index beyond endof("∀∀")
for idx in 5:8
@test_throws BoundsError SubString("∀∀", 1, idx)
@test_throws BoundsError SubString("∀∀", 4, idx)
end

let str="tempus fugit" #length(str)==12
ss=SubString(str,1,length(str)) #match source string
ss=SubString(str,1,endof(str)) #match source string
@test length(ss)==length(str)

ss=SubString(str,1:endof(str))
@test length(ss)==length(str)

ss=SubString(str,1,0) #empty SubString
@test length(ss)==0

ss=SubString(str,14,20) #start indexed beyond source string length
ss=SubString(str,1:0)
@test length(ss)==0

ss=SubString(str,10,16) #end indexed beyond source string length
@test length(ss)==3
@test_throws BoundsError SubString(str,14,20) #start indexing beyond source string length
@test_throws BoundsError SubString(str,10,16) #end indexing beyond source string length

str2=""
ss=SubString(str2,1,4) #empty source string
@test length(ss)==0

ss=SubString(str2,1,1) #empty source string, identical start and end index
@test length(ss)==0
@test_throws BoundsError SubString("", 1, 4) #empty source string
@test_throws BoundsError SubString("", 1, 1) #empty source string, identical start and end index
@test_throws BoundsError SubString("", 10, 12)
@test SubString("",12,10) == ""
end

@test SubString("foobar", big(1), big(3)) == "foo"
Expand All @@ -55,7 +83,7 @@ let str = "aa\u2200\u2222bb"
write(b, u)
@test String(take!(b)) == "\u2200\u2222"

@test_throws ArgumentError SubString(str, 4, 5)
@test_throws BoundsError SubString(str, 4, 5)
@test_throws BoundsError next(u, 0)
@test_throws BoundsError next(u, 7)
@test_throws BoundsError getindex(u, 0)
Expand All @@ -68,21 +96,14 @@ let str = "aa\u2200\u2222bb"
end

let str = "føøbar"
@test_throws BoundsError SubString(str, 10, 10)
u = SubString(str, 4, 3)
@test length(u) == 0
b = IOBuffer()
write(b, u)
@test String(take!(b)) == ""
end

let str = "føøbar"
u = SubString(str, 10, 10)
@test length(u) == 0
b = IOBuffer()
write(b, u)
@test String(take!(b)) == ""
end

# search and SubString (issue #5679)
let str = "Hello, world!"
u = SubString(str, 1, 5)
Expand All @@ -91,6 +112,22 @@ let str = "Hello, world!"
@test rsearch(u, "ll") == 3:4
end

# SubString created from SubString
let str = "Hello, world!"
u = SubString(str, 2, 5)
for idx in 1:4
@test SubString(u, 2, idx) == u[2:idx]
@test SubString(u, 2:idx) == u[2:idx]
end
@test_throws BoundsError SubString(u, 1, 10)
@test_throws BoundsError SubString(u, 1:10)
@test_throws BoundsError SubString(u, 20:30)
@test SubString(u, 20:15) == ""
@test_throws BoundsError SubString(u, -1:10)
@test SubString(u, -1, -10) == ""
@test SubString(SubString("123", 1, 2), -10, -20) == ""
end

# sizeof
@test sizeof(SubString("abc\u2222def",4,4)) == 3

Expand All @@ -115,8 +152,8 @@ let ss, s="lorem ipsum",
SubString(s,1,6)=>"lorem ",
SubString(s,1,0)=>"",
SubString(s,2,4)=>"ore",
SubString(s,2,16)=>"orem ipsum",
SubString(s,12,14)=>""
SubString(s,2,11)=>"orem ipsum",
SubString(s,15,14)=>""
)
for (ss,s) in sdict
local ss
Expand All @@ -140,9 +177,18 @@ end #let

#for isvalid(SubString{String})
let s = "Σx + βz - 2"
for i in -1:(length(s) + 2)
local ss = SubString(s, 1, i)
@test isvalid(ss, i) == isvalid(s, i)
for i in -1:(length(s)+2)
if isvalid(s, i)
ss=SubString(s,1,i)
# make sure isvalid gives equivalent results for SubString and String
@test isvalid(ss,i)==isvalid(s,i)
else
if i > 0
@test_throws BoundsError SubString(s,1,i)
else
@test SubString(s,1,i) == ""
end
end
end
end

Expand Down
4 changes: 4 additions & 0 deletions test/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,11 @@ end

@testset "chomp/chop" begin
@test chomp("foo\n") == "foo"
@test chomp("fo∀\n") == "fo∀"
@test chomp("fo∀") == "fo∀"
@test chop("fooε") == "foo"
@test chop("foεo") == "foε"
@test chop("∃∃∃∃") == "∃∃∃"
@test isa(chomp("foo"), SubString)
@test isa(chop("foo"), SubString)
end
Expand Down

0 comments on commit 39f668c

Please sign in to comment.