From 9d18859caeb4f998f9daf1e9e2252a0cf96a41ad Mon Sep 17 00:00:00 2001 From: Rajesh Vaidheeswarran Date: Sat, 18 Aug 2018 00:23:19 -0400 Subject: [PATCH 1/9] [Issue 28415] Add unique!(f, itr) --- base/set.jl | 29 +++++++++++++++++++++++++++++ test/sets.jl | 3 +++ 2 files changed, 32 insertions(+) diff --git a/base/set.jl b/base/set.jl index a3e9954d21477..82e228d39a800 100644 --- a/base/set.jl +++ b/base/set.jl @@ -158,6 +158,35 @@ function unique(f::Callable, C) out end +""" + unique!(f, itr) + +In-place replace an array containing one value from `itr` for each unique value produced by `f` +applied to elements of `itr`. + +# Examples +```jldoctest +julia> v = Vector([1, -1, 3, -3, 4, -4, 5, -5, 6, -6]) +julia> unique!(x -> x^2, v) +julia> print(v) +[1, 3, 4, 5, 6] +``` +""" +function unique!(f::Callable, C) + out = Vector{eltype(C)}() + seen = Set() + i = 1 + while i <= length(C) + y = f(C[i]) + if !in(y, seen) + push!(seen, y) + i += 1 + else + splice!(C, i) + end + end +end + # If A is not grouped, then we will need to keep track of all of the elements that we have # seen so far. function _unique!(A::AbstractVector) diff --git a/test/sets.jl b/test/sets.jl index ea523d43afb72..1b529ff04dbf5 100644 --- a/test/sets.jl +++ b/test/sets.jl @@ -350,6 +350,9 @@ end u = [1,1,3,2,1] unique!(u) @test u == [1,3,2] + u = [5, 1, 8, 9, 3, 4, 10, 7, 2, 6] + unique!(n -> n % 3, u) + @test u == [5, 1, 9] @test unique!([]) == [] @test unique!(Float64[]) == Float64[] u = [1,2,2,3,5,5] From 487cf6378f1a7f94857a8ee6f05240344b951841 Mon Sep 17 00:00:00 2001 From: Rajesh Vaidheeswarran Date: Sat, 18 Aug 2018 10:04:19 -0400 Subject: [PATCH 2/9] [Issue 28415] Remove extraneous variable --- base/set.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/base/set.jl b/base/set.jl index 82e228d39a800..543464445f364 100644 --- a/base/set.jl +++ b/base/set.jl @@ -173,7 +173,6 @@ julia> print(v) ``` """ function unique!(f::Callable, C) - out = Vector{eltype(C)}() seen = Set() i = 1 while i <= length(C) From c546c70366fad14a62fd92fa537342cf08f9f15a Mon Sep 17 00:00:00 2001 From: Rajesh Vaidheeswarran Date: Sat, 18 Aug 2018 12:15:17 -0400 Subject: [PATCH 3/9] [Issue 28415] Crazy, but implementing unique!(f::Callable, itr) as unique(f::Callable, itr) is much faster than the current version. The culprit seems to be splice! --- base/set.jl | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/base/set.jl b/base/set.jl index 543464445f364..c0a0155173243 100644 --- a/base/set.jl +++ b/base/set.jl @@ -173,17 +173,9 @@ julia> print(v) ``` """ function unique!(f::Callable, C) - seen = Set() - i = 1 - while i <= length(C) - y = f(C[i]) - if !in(y, seen) - push!(seen, y) - i += 1 - else - splice!(C, i) - end - end + x = unique(f, C) + resize!(C, 1) + splice!(C, 1, x) end # If A is not grouped, then we will need to keep track of all of the elements that we have From d27834d0fbb2b8434c26fcbb0f720c47118dd0c3 Mon Sep 17 00:00:00 2001 From: Rajesh Vaidheeswarran Date: Sat, 18 Aug 2018 14:50:38 -0400 Subject: [PATCH 4/9] [Issue 28415] For some reason makedocs is barfing on Vector.. remove that --- base/set.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/set.jl b/base/set.jl index c0a0155173243..5998445c4b2fd 100644 --- a/base/set.jl +++ b/base/set.jl @@ -166,7 +166,7 @@ applied to elements of `itr`. # Examples ```jldoctest -julia> v = Vector([1, -1, 3, -3, 4, -4, 5, -5, 6, -6]) +julia> v = [1, -1, 3, -3, 4, -4, 5, -5, 6, -6] julia> unique!(x -> x^2, v) julia> print(v) [1, 3, 4, 5, 6] From 805ce5474ac714e101df3de5771cee48edf2a127 Mon Sep 17 00:00:00 2001 From: Rajesh Vaidheeswarran Date: Sat, 18 Aug 2018 23:36:28 -0400 Subject: [PATCH 5/9] [Issue 28415] Fix documentation to pass jldoctest --- base/set.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/base/set.jl b/base/set.jl index 5998445c4b2fd..9fc8ffdc6ba5a 100644 --- a/base/set.jl +++ b/base/set.jl @@ -166,9 +166,7 @@ applied to elements of `itr`. # Examples ```jldoctest -julia> v = [1, -1, 3, -3, 4, -4, 5, -5, 6, -6] -julia> unique!(x -> x^2, v) -julia> print(v) +julia> v = [1, -1, 3, -3, 4, -4, 5, -5, 6, -6]; unique!(x -> x^2, v); print(v) [1, 3, 4, 5, 6] ``` """ From d7210b331b50d4ecdb4959711d98af044efaeb40 Mon Sep 17 00:00:00 2001 From: Rajesh Vaidheeswarran Date: Mon, 20 Aug 2018 22:11:21 -0400 Subject: [PATCH 6/9] [Issue 28415] Update to not use not-in-place function --- base/set.jl | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/base/set.jl b/base/set.jl index 9fc8ffdc6ba5a..b88c9c16bd5a9 100644 --- a/base/set.jl +++ b/base/set.jl @@ -170,10 +170,32 @@ julia> v = [1, -1, 3, -3, 4, -4, 5, -5, 6, -6]; unique!(x -> x^2, v); print(v) [1, 3, 4, 5, 6] ``` """ -function unique!(f::Callable, C) - x = unique(f, C) - resize!(C, 1) - splice!(C, 1, x) +function unique!(f::Callable, A) + seen = Set{eltype(A)}() + l = length(A) + cur = 1 + index = 1 + done = false + tmp = Nothing + while !done + y = f(A[cur]) + if y ∉ seen + push!(seen, y) + if cur != index + tmp = A[index] + A[index] = A[cur] + A[cur] = tmp + end + index += 1 + end + cur += 1 + + if cur > l + done = true + end + end + if cur != index splice!(A, index:cur-1); end + A end # If A is not grouped, then we will need to keep track of all of the elements that we have From f0146c7e6e1b317059083f388840c900ad75ca7d Mon Sep 17 00:00:00 2001 From: Rajesh Vaidheeswarran Date: Mon, 20 Aug 2018 22:24:35 -0400 Subject: [PATCH 7/9] [Issue 28415] Simplify swap --- base/set.jl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/base/set.jl b/base/set.jl index b88c9c16bd5a9..a50716286b49c 100644 --- a/base/set.jl +++ b/base/set.jl @@ -176,15 +176,12 @@ function unique!(f::Callable, A) cur = 1 index = 1 done = false - tmp = Nothing while !done y = f(A[cur]) if y ∉ seen push!(seen, y) if cur != index - tmp = A[index] - A[index] = A[cur] - A[cur] = tmp + A[cur], A[index] = A[index], A[cur] end index += 1 end From b5c6ecf6820da4cee760038912ea02fdfb43db94 Mon Sep 17 00:00:00 2001 From: Rajesh Vaidheeswarran Date: Tue, 21 Aug 2018 22:32:10 -0400 Subject: [PATCH 8/9] [Issue 28415] resize! seems to be faster than splice! --- base/set.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/base/set.jl b/base/set.jl index a50716286b49c..75ecd99565173 100644 --- a/base/set.jl +++ b/base/set.jl @@ -191,8 +191,7 @@ function unique!(f::Callable, A) done = true end end - if cur != index splice!(A, index:cur-1); end - A + ifelse(cur != index, resize!(A, index-1), A) end # If A is not grouped, then we will need to keep track of all of the elements that we have From 3ac292a160ca95a000ee256872609d35807879f7 Mon Sep 17 00:00:00 2001 From: Rajesh Vaidheeswarran Date: Wed, 22 Aug 2018 10:53:10 -0400 Subject: [PATCH 9/9] [Issue 28415] use ?: instead of ifelse to avoid unconditional evaluation of all parameters --- base/set.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/set.jl b/base/set.jl index 75ecd99565173..fba2a93bbeb99 100644 --- a/base/set.jl +++ b/base/set.jl @@ -191,7 +191,7 @@ function unique!(f::Callable, A) done = true end end - ifelse(cur != index, resize!(A, index-1), A) + cur != index ? resize!(A, index-1) : A end # If A is not grouped, then we will need to keep track of all of the elements that we have