Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add replace & replace! for collections #22324

Merged
merged 1 commit into from
Dec 22, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,10 @@ Library improvements
defined, linear-algebra function `transpose`. Similarly,
`permutedims(v::AbstractVector)` will create a row matrix ([#24839]).

* A new `replace(A, old=>new)` function is introduced to replace `old` by `new` in
collection `A`. There are also two other methods with a different API, and
a mutating variant, `replace!` ([#22324]).

* `CartesianRange` changes ([#24715]):
- Inherits from `AbstractArray`, and linear indexing can be used to provide
linear-to-cartesian conversion ([#24715])
Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,7 @@ export
randstring,
repeat,
replace,
replace!,
repr,
reverseind,
rpad,
Expand Down
238 changes: 238 additions & 0 deletions base/set.jl
Original file line number Diff line number Diff line change
Expand Up @@ -544,3 +544,241 @@ end

convert(::Type{T}, s::T) where {T<:AbstractSet} = s
convert(::Type{T}, s::AbstractSet) where {T<:AbstractSet} = T(s)


## replace/replace! ##

"""
replace!(A, old_new::Pair...; [count::Integer])

For each pair `old=>new` in `old_new`, replace all occurrences
of `old` in collection `A` by `new`.
If `count` is specified, then replace at most `count` occurrences in total.
See also [`replace`](@ref replace(A, old_new::Pair...)).

# Examples
```jldoctest
julia> replace!([1, 2, 1, 3], 1=>0, 2=>4, count=2)
4-element Array{Int64,1}:
0
4
1
3

julia> replace!(Set([1, 2, 3]), 1=>0)
Set([0, 2, 3])
```
"""
replace!(A, old_new::Pair...; count::Integer=typemax(Int)) = _replace!(A, eltype(A), count, old_new)

# we use this wrapper because using directly eltype(A) as the type
# parameter below for Some degrades performance
function _replace!(A, ::Type{K}, count::Integer, old_new::Tuple{Vararg{Pair}}) where K
@inline function prednew(x)
for o_n in old_new
first(o_n) == x && return Some{K}(last(o_n))
end
end
replace!(prednew, A, count=count)
end

"""
replace!(pred::Function, A, new; [count::Integer])

Replace all occurrences `x` in collection `A` for which `pred(x)` is true
by `new`.

# Examples
```jldoctest
julia> A = [1, 2, 3, 1];

julia> replace!(isodd, A, 0, count=2)
4-element Array{Int64,1}:
0
2
0
1
```
"""
replace!(pred::Callable, A, new; count::Integer=typemax(Int)) =
replace!(x -> if pred(x) Some(new) end, A, count=count)

"""
replace!(prednew::Function, A; [count::Integer])

For each value `x` in `A`, `prednew(x)` is called and must
return either `nothing`, in which case no replacement occurs,
or a value, possibly wrapped as a [`Some`](@ref) object, which
will be used as a replacement for `x`.

# Examples
```jldoctest
julia> replace!(x -> isodd(x) ? 2x : nothing, [1, 2, 3, 4])
4-element Array{Int64,1}:
2
2
6
4

julia> replace!(Union{Int,Nothing}[0, 1, 2, nothing, 4], count=2) do x
x !== nothing && iseven(x) ? Some(nothing) : nothing
end
5-element Array{Union{Nothing,Int64},1}:
nothing
1
nothing
nothing
4

julia> replace!(Dict(1=>2, 3=>4)) do kv
if first(kv) < 3; first(kv)=>3 end
end
Dict{Int64,Int64} with 2 entries:
3 => 4
1 => 3

julia> replace!(x->2x, Set([3, 6]))
Set([6, 12])
```
"""
replace!(prednew::Callable, A; count::Integer=typemax(Int)) =
replace!(prednew, A, count=clamp(count, typemin(Int), typemax(Int)) % Int)



"""
replace(A, old_new::Pair...; [count::Integer])

Return a copy of collection `A` where, for each pair `old=>new` in `old_new`,
all occurrences of `old` are replaced by `new`.
If `count` is specified, then replace at most `count` occurrences in total.
See also [`replace!`](@ref).

# Examples
```jldoctest
julia> replace([1, 2, 1, 3], 1=>0, 2=>4, count=2)
4-element Array{Int64,1}:
0
4
1
3
```
"""
replace(A, old_new::Pair...; count::Integer=typemax(Int)) =
_replace!(copy(A), eltype(A), count, old_new)

"""
replace(pred::Function, A, new; [count::Integer])

Return a copy of collection `A` where all occurrences `x` for which
`pred(x)` is true are replaced by `new`.

# Examples
```jldoctest
julia> replace(isodd, [1, 2, 3, 1], 0, count=2)
4-element Array{Int64,1}:
0
2
0
1
```
"""
replace(pred::Callable, A, new; count::Integer=typemax(Int)) =
replace!(x -> if pred(x) Some(new) end, copy(A), count=count)

"""
replace(prednew::Function, A; [count::Integer])

Return a copy of `A` where for each value `x` in `A`, `prednew(x)` is called
and must return either `nothing`, in which case no replacement occurs,
or a value, possibly wrapped as a [`Some`](@ref) object, which
will be used as a replacement for `x`.

# Examples
```jldoctest
julia> replace(x -> isodd(x) ? 2x : nothing, [1, 2, 3, 4])
4-element Array{Int64,1}:
2
2
6
4

julia> replace(Union{Int,Nothing}[0, 1, 2, nothing, 4], count=2) do x
x !== nothing && iseven(x) ? Some(nothing) : nothing
end
5-element Array{Union{Nothing,Int64},1}:
nothing
1
nothing
nothing
4

julia> replace(Dict(1=>2, 3=>4)) do kv
if first(kv) < 3; first(kv)=>3 end
end
Dict{Int64,Int64} with 2 entries:
3 => 4
1 => 3
```
"""
replace(prednew::Callable, A; count::Integer=typemax(Int)) = replace!(prednew, copy(A), count=count)

# Handle ambiguities
replace!(a::Callable, b::Pair; count::Integer=-1) = throw(MethodError(replace!, (a, b)))
replace!(a::Callable, b::Pair, c::Pair; count::Integer=-1) = throw(MethodError(replace!, (a, b, c)))
replace(a::Callable, b::Pair; count::Integer=-1) = throw(MethodError(replace, (a, b)))
replace(a::Callable, b::Pair, c::Pair; count::Integer=-1) = throw(MethodError(replace, (a, b, c)))
replace(a::AbstractString, b::Pair, c::Pair) = throw(MethodError(replace, (a, b, c)))


### replace! for AbstractDict/AbstractSet

askey(k, ::AbstractDict) = k.first
askey(k, ::AbstractSet) = k

function _replace_update_dict!(repl::Vector{<:Pair}, x, y::Some)
push!(repl, x => y.value)
true
end

_replace_update_dict!(repl::Vector{<:Pair}, x, ::Nothing) = false
_replace_update_dict!(repl::Vector{<:Pair}, x, y) = _replace_update_dict!(repl, x, Some(y))

function replace!(prednew::Callable, A::Union{AbstractDict,AbstractSet}; count::Int=typemax(Int))
count < 0 && throw(DomainError(count, "`count` must not be negative"))
count == 0 && return A
repl = Pair{eltype(A),eltype(A)}[]
c = 0
for x in A
c += _replace_update_dict!(repl, x, prednew(x))
c == count && break
end
for oldnew in repl
pop!(A, askey(first(oldnew), A))
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this do all deletions before doing all insertions?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did it this way to avoid 2 passes, and as a result had to put a warning in the docstring. But you are definitely right. It's much more predictable, and the basic benchmarks I just did don't show any overhead :) thanks!

Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

splitting the oldnew replacements list into old and new lists might even help performance? (for non-isbits Pair{Key, Value})

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just did a benchmark with strings, and it doesn't seem to make a difference. I think I will not bother now, and will investigate when I have time again, but thanks.

end
for oldnew in repl
push!(A, last(oldnew))
end
A
end

### AbstractArray

function _replace_update!(A::AbstractArray, i::Integer, y::Some)
@inbounds A[i] = y.value
true
end

_replace_update!(A::AbstractArray, i::Integer, ::Nothing) = false
_replace_update!(A::AbstractArray, i::Integer, y) = _replace_update!(A, i, Some(y))

function replace!(prednew::Callable, A::AbstractArray; count::Int=typemax(Int))
count < 0 && throw(DomainError(count, "`count` must not be negative"))
count == 0 && return A
c = 0
for i in eachindex(A)
c += _replace_update!(A, i, prednew(A[i]))
c == count && break
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's probably an overkill optimization, but I wonder whether using a special loop for count == typemax(Int) could be worth it in some cases where the function is so simple that SIMD might be enabled without the break call. Maybe leave that to future improvements after some benchmarking.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add that on my todo list to not forget :)

end
A
end
6 changes: 3 additions & 3 deletions base/sysimg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ if !isdefined(Core, :Inference)
Core.atdoc!(CoreDocs.docm)
end

# Some type
include("some.jl")

include("dict.jl")
include("set.jl")
include("iterators.jl")
Expand Down Expand Up @@ -255,9 +258,6 @@ include("multidimensional.jl")
include("permuteddimsarray.jl")
using .PermutedDimsArrays

# Some type
include("some.jl")

include("broadcast.jl")
using .Broadcast

Expand Down
4 changes: 4 additions & 0 deletions doc/src/stdlib/collections.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ Base.collect(::Type, ::Any)
Base.issubset(::Any, ::Any)
Base.filter
Base.filter!
Base.replace(::Any, ::Pair...)
Base.replace(::Base.Callable, ::Any, ::Any)
Base.replace(::Base.Callable, ::Any)
Base.replace!
```

## Indexable Collections
Expand Down
2 changes: 1 addition & 1 deletion doc/src/stdlib/strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Base.searchindex
Base.rsearchindex
Base.contains(::AbstractString, ::AbstractString)
Base.reverse(::Union{String,SubString{String}})
Base.replace
Base.replace(s::AbstractString, pat, f)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This old method is inconsistent with the new ones. Maybe deprecate it in favor of pat => f?

Copy link
Member Author

@rfourquet rfourquet Dec 18, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in #25165 :)

Base.split
Base.rsplit
Base.strip
Expand Down
53 changes: 53 additions & 0 deletions test/sets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -471,3 +471,56 @@ end
end
end
end

@testset "replace! & replace" begin
maybe1(v, p) = if p Some(v) end
maybe2(v, p) = if p v end

for maybe = (maybe1, maybe2)
a = [1, 2, 3, 1]
@test replace(x->maybe(2x, iseven(x)), a) == [1, 4, 3, 1]
@test replace!(x->maybe(2x, iseven(x)), a) === a
@test a == [1, 4, 3, 1]
@test replace(a, 1=>0) == [0, 4, 3, 0]
@test replace(a, 1=>0, count=1) == [0, 4, 3, 1]
@test replace!(a, 1=>2) === a
@test a == [2, 4, 3, 2]

d = Dict(1=>2, 3=>4)
@test replace(x->x.first > 2, d, 0=>0) == Dict(1=>2, 0=>0)
@test replace!(x->maybe(x.first=>2*x.second, x.first > 2), d) === d
@test d == Dict(1=>2, 3=>8)
@test replace(d, (3=>8)=>(0=>0)) == Dict(1=>2, 0=>0)
@test replace!(d, (3=>8)=>(2=>2)) === d
@test d == Dict(1=>2, 2=>2)
@test replace(x->x.second == 2, d, 0=>0, count=1) in [Dict(1=>2, 0=>0),
Dict(2=>2, 0=>0)]

s = Set([1, 2, 3])
@test replace(x->maybe(2x, x>1), s) == Set([1, 4, 6])
@test replace(x->maybe(2x, x>1), s, count=1) in [Set([1, 4, 3]), Set([1, 2, 6])]
@test replace(s, 1=>4) == Set([2, 3, 4])
@test replace!(s, 1=>2) === s
@test s == Set([2, 3])

@test replace([1, 2], 1=>0, 2=>0, count=0) == [1, 2] # count=0 --> no replacements
end
# test collisions with AbstractSet/AbstractDict
@test replace!(x->2x, Set([3, 6])) == Set([6, 12])
@test replace!(x->2x, Set([1:20;])) == Set([2:2:40;])
@test replace!(kv -> (2kv[1] => kv[2]), Dict(1=>2, 2=>4, 4=>8, 8=>16)) == Dict(2=>2, 4=>4, 8=>8, 16=>16)
# test Some(nothing)

a = [1, 2, nothing, 4]
@test replace(x -> x === nothing ? 0 : Some(nothing), a) == [nothing, nothing, 0, nothing]
@test replace(x -> x === nothing ? 0 : nothing, a) == [1, 2, 0, 4]
@test replace!(x -> x !== nothing ? Some(nothing) : nothing, a) == [nothing, nothing, nothing, nothing]
@test replace(iseven, Any[1, 2, 3, 4], nothing) == [1, nothing, 3, nothing]
@test replace(Any[1, 2, 3, 4], 1=>nothing, 3=>nothing) == [nothing, 2, nothing, 4]
s = Set([1, 2, nothing, 4])
@test replace(x -> x === nothing ? 0 : Some(nothing), s) == Set([0, nothing])
@test replace(x -> x === nothing ? 0 : nothing, s) == Set([1, 2, 0, 4])
@test replace(x -> x !== nothing ? Some(nothing) : nothing, s) == Set([nothing])
@test replace(iseven, Set(Any[1, 2, 3, 4]), nothing) == Set([1, nothing, 3, nothing])
@test replace(Set(Any[1, 2, 3, 4]), 1=>nothing, 3=>nothing) == Set([nothing, 2, nothing, 4])
end