From 6fa8436c2f342cdb5670ff2a3b376d778025571a Mon Sep 17 00:00:00 2001
From: Sacha Verweij <sacha@stanford.edu>
Date: Mon, 25 Jan 2016 16:10:50 -0800
Subject: [PATCH] Replace the LGPL-licensed sparse() parent method with an
 MIT-licensed version. See #13001 and #14631.

---
 base/sparse/csparse.jl      | 124 ---------------------
 base/sparse/sparsematrix.jl | 214 +++++++++++++++++++++++++++++++++++-
 test/sparsedir/sparse.jl    |  13 ++-
 3 files changed, 224 insertions(+), 127 deletions(-)

diff --git a/base/sparse/csparse.jl b/base/sparse/csparse.jl
index a9b251eea5b5bf..cc3b324117d6e3 100644
--- a/base/sparse/csparse.jl
+++ b/base/sparse/csparse.jl
@@ -13,130 +13,6 @@
 # Section 2.4: Triplet form
 # http://www.cise.ufl.edu/research/sparse/CSparse/
 
-"""
-    sparse(I,J,V,[m,n,combine])
-
-Create a sparse matrix `S` of dimensions `m x n` such that `S[I[k], J[k]] = V[k]`.
-The `combine` function is used to combine duplicates. If `m` and `n` are not
-specified, they are set to `maximum(I)` and `maximum(J)` respectively. If the
-`combine` function is not supplied, duplicates are added by default. All elements
-of `I` must satisfy `1 <= I[k] <= m`, and all elements of `J` must satisfy `1 <= J[k] <= n`.
-"""
-function sparse{Tv,Ti<:Integer}(I::AbstractVector{Ti},
-                                J::AbstractVector{Ti},
-                                V::AbstractVector{Tv},
-                                nrow::Integer, ncol::Integer,
-                                combine::Union{Function,Base.Func})
-
-    N = length(I)
-    if N != length(J) || N != length(V)
-        throw(ArgumentError("triplet I,J,V vectors must be the same length"))
-    end
-    if N == 0
-        return spzeros(eltype(V), Ti, nrow, ncol)
-    end
-
-    # Work array
-    Wj = Array(Ti, max(nrow,ncol)+1)
-
-    # Allocate sparse matrix data structure
-    # Count entries in each row
-    Rnz = zeros(Ti, nrow+1)
-    Rnz[1] = 1
-    nz = 0
-    for k=1:N
-        iind = I[k]
-        iind > 0 || throw(ArgumentError("all I index values must be > 0"))
-        iind <= nrow || throw(ArgumentError("all I index values must be ≤ the number of rows"))
-        if V[k] != 0
-            Rnz[iind+1] += 1
-            nz += 1
-        end
-    end
-    Rp = cumsum(Rnz)
-    Ri = Array(Ti, nz)
-    Rx = Array(Tv, nz)
-
-    # Construct row form
-    # place triplet (i,j,x) in column i of R
-    # Use work array for temporary row pointers
-    @simd for i=1:nrow; @inbounds Wj[i] = Rp[i]; end
-
-    @inbounds for k=1:N
-        iind = I[k]
-        jind = J[k]
-        jind > 0 || throw(ArgumentError("all J index values must be > 0"))
-        jind <= ncol || throw(ArgumentError("all J index values must be ≤ the number of columns"))
-        p = Wj[iind]
-        Vk = V[k]
-        if Vk != 0
-            Wj[iind] += 1
-            Rx[p] = Vk
-            Ri[p] = jind
-        end
-    end
-
-    # Reset work array for use in counting duplicates
-    @simd for j=1:ncol; @inbounds Wj[j] = 0; end
-
-    # Sum up duplicates and squeeze
-    anz = 0
-    @inbounds for i=1:nrow
-        p1 = Rp[i]
-        p2 = Rp[i+1] - 1
-        pdest = p1
-
-        for p = p1:p2
-            j = Ri[p]
-            pj = Wj[j]
-            if pj >= p1
-                Rx[pj] = combine(Rx[pj], Rx[p])
-            else
-                Wj[j] = pdest
-                if pdest != p
-                    Ri[pdest] = j
-                    Rx[pdest] = Rx[p]
-                end
-                pdest += one(Ti)
-            end
-        end
-
-        Rnz[i] = pdest - p1
-        anz += (pdest - p1)
-    end
-
-    # Transpose from row format to get the CSC format
-    RiT = Array(Ti, anz)
-    RxT = Array(Tv, anz)
-
-    # Reset work array to build the final colptr
-    Wj[1] = 1
-    @simd for i=2:(ncol+1); @inbounds Wj[i] = 0; end
-    @inbounds for j = 1:nrow
-        p1 = Rp[j]
-        p2 = p1 + Rnz[j] - 1
-        for p = p1:p2
-            Wj[Ri[p]+1] += 1
-        end
-    end
-    RpT = cumsum(Wj[1:(ncol+1)])
-
-    # Transpose
-    @simd for i=1:length(RpT); @inbounds Wj[i] = RpT[i]; end
-    @inbounds for j = 1:nrow
-        p1 = Rp[j]
-        p2 = p1 + Rnz[j] - 1
-        for p = p1:p2
-            ind = Ri[p]
-            q = Wj[ind]
-            Wj[ind] += 1
-            RiT[q] = j
-            RxT[q] = Rx[p]
-        end
-    end
-
-    return SparseMatrixCSC(nrow, ncol, RpT, RiT, RxT)
-end
 
 # Compute the elimination tree of A using triu(A) returning the parent vector.
 # A root node is indicated by 0. This tree may actually be a forest in that
diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl
index b4b976d4b89b6a..47bc1a8e2870cd 100644
--- a/base/sparse/sparsematrix.jl
+++ b/base/sparse/sparsematrix.jl
@@ -318,7 +318,219 @@ function sparse_IJ_sorted!{Ti<:Integer}(I::AbstractVector{Ti}, J::AbstractVector
     return SparseMatrixCSC(m, n, colptr, I, V)
 end
 
-## sparse() can take its inputs in unsorted order (the parent method is now in csparse.jl)
+"""
+    sparse(I, J, V, [m, n, combine])
+
+Create a sparse matrix `S` of dimensions `m` x `n` such that `S[I[k], J[k]] = V[k]`. The
+`combine` function is used to combine duplicates. If `m` and `n` are not specified, they
+are set to `maximum(I)` and `maximum(J)` respectively. If the `combine` function is not
+supplied, duplicates are added by default. All elements of `I` must satisfy
+`1 <= I[k] <= m`, and all elements of `J` must satisfy `1 <= J[k] <= n`.
+
+For additional documentation and an expert driver, see `Base.SparseArrays.sparse!`.
+"""
+function sparse{Tv,Ti<:Integer}(I::AbstractVector{Ti}, J::AbstractVector{Ti}, V::AbstractVector{Tv}, m::Integer, n::Integer, combine)
+    coolen = length(I)
+    if length(J) != coolen || length(V) != coolen
+        throw(ArgumentError(string("the first three arguments' lengths must match, ",
+              "length(I) (=$(length(I))) == length(J) (= $(length(J))) == length(V) (= ",
+              "$(length(V)))")))
+    end
+
+    if m == 0 || n == 0 || coolen == 0
+        if coolen != 0
+            if n == 0
+                throw(ArgumentError("column indices J[k] must satisfy 1 <= J[k] <= n"))
+            elseif m == 0
+                throw(ArgumentError("row indices I[k] must satisfy 1 <= I[k] <= m"))
+            end
+        end
+        SparseMatrixCSC(m, n, ones(Ti, n+1), Vector{Ti}(), Vector{Tv}())
+    else
+        # Allocate storage for CSR form
+        csrrowptr = Vector{Ti}(m+1)
+        csrcolval = Vector{Ti}(coolen)
+        csrnzval = Vector{Tv}(coolen)
+
+        # Allocate storage for the CSC form's column pointers and a necessary workspace
+        csccolptr = Vector{Ti}(n+1)
+        klasttouch = Vector{Ti}(n)
+
+        # Allocate empty arrays for the CSC form's row and nonzero value arrays
+        # The parent method called below automagically resizes these arrays
+        cscrowval = Vector{Ti}()
+        cscnzval = Vector{Tv}()
+
+        sparse!(I, J, V, m, n, combine, klasttouch,
+                csrrowptr, csrcolval, csrnzval,
+                csccolptr, cscrowval, cscnzval )
+    end
+end
+
+"""
+    sparse!{Tv,Ti<:Integer}(
+        I::AbstractVector{Ti}, J::AbstractVector{Ti}, V::AbstractVector{Tv},
+        m::Integer, n::Integer, combine, klasttouch::Vector{Ti},
+        csrrowptr::Vector{Ti}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
+        [csccolptr::Vector{Ti}], [cscrowval::Vector{Ti}, cscnzval::Vector{Tv}] )
+
+Parent of and expert driver for `sparse`; see `sparse` for basic usage. This method
+allows the user to provide preallocated storage for `sparse`'s intermediate objects and
+result as described below. This capability enables more efficient successive construction
+of `SparseMatrixCSC`s from coordinate representations, and also enables extraction of an
+unsorted-column representation of the result's transpose at no additional cost.
+
+This method consists of three major steps: (1) Counting-sort the provided coordinate
+representation into an unsorted-row CSR form including repeated entries. (2) Sweep through
+the CSR form, simultaneously calculating the desired CSC form's column-pointer array,
+detecting repeated entries, and repacking the CSR form with repeated entries combined;
+this stage yields an unsorted-row CSR form with no repeated entries. (3) Counting-sort the
+preceding CSR form into a fully-sorted CSC form with no repeated entries.
+
+Input arrays `csrrowptr`, `csrcolval`, and `csrnzval` constitute storage for the
+intermediate CSR forms and require `length(csrrowptr) >= m + 1`,
+`length(csrcolval) >= length(I)`, and `length(csrnzval >= length(I))`. Input
+array `klasttouch`, workspace for the second stage, requires `length(klasttouch) >= n`.
+Optional input arrays `csccolptr`, `cscrowval`, and `cscnzval` constitute storage for the
+returned CSC form `S`. `csccolptr` requires `length(csccolptr) >= n + 1`. If necessary,
+`cscrowval` and `cscnzval` are automatically resized to satisfy
+`length(cscrowval) >= nnz(S)` and `length(cscnzval) >= nnz(S)`; hence, if `nnz(S)` is
+unknown at the outset, passing in empty vectors of the appropriate type (`Vector{Ti}()`
+and `Vector{Tv}()` respectively) suffices, or calling the `sparse!` method
+neglecting `cscrowval` and `cscnzval`.
+
+On return, `csrrowptr`, `csrcolval`, and `csrnzval` contain an unsorted-column
+representation of the result's transpose.
+
+For the sake of efficiency, this method performs no argument checking beyond
+`1 <= I[k] <= m` and `1 <= J[k] <= n`. Use with care. Testing with `--check-bounds=yes`
+is wise.
+
+This method runs in `O(m, n, length(I))` time. The HALFPERM algorithm described in
+F. Gustavson, "Two fast algorithms for sparse matrices: multiplication and permuted
+transposition," ACM TOMS 4(3), 250-269 (1978) inspired this method's use of a pair of
+counting sorts.
+
+Performance note: As of January 2016, `combine` should be a functor for this method to
+perform well. This caveat may disappear when the work in `jb/functions` lands.
+"""
+function sparse!{Tv,Ti<:Integer}(I::AbstractVector{Ti}, J::AbstractVector{Ti},
+        V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Ti},
+        csrrowptr::Vector{Ti}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
+        csccolptr::Vector{Ti}, cscrowval::Vector{Ti}, cscnzval::Vector{Tv} )
+
+    # Compute the CSR form's row counts and store them shifted forward by one in csrrowptr
+    fill!(csrrowptr, 0)
+    coolen = length(I)
+    @inbounds for k in 1:coolen
+        Ik = I[k]
+        if 1 > Ik || m < Ik
+            throw(ArgumentError("row indices I[k] must satisfy 1 <= I[k] <= m"))
+        end
+        csrrowptr[Ik+1] += 1
+    end
+
+    # Compute the CSR form's rowptrs and store them shifted forward by one in csrrowptr
+    countsum = 1
+    csrrowptr[1] = 1
+    @inbounds for i in 2:(m+1)
+        overwritten = csrrowptr[i]
+        csrrowptr[i] = countsum
+        countsum += overwritten
+    end
+
+    # Counting-sort the column and nonzero values from J and V into csrcolval and csrnzval
+    # Tracking write positions in csrrowptr corrects the row pointers
+    @inbounds for k in 1:coolen
+        Ik, Jk = I[k], J[k]
+        if 1 > Jk || n < Jk
+            throw(ArgumentError("column indices J[k] must satisfy 1 <= J[k] <= n"))
+        end
+        csrk = csrrowptr[Ik+1]
+        csrrowptr[Ik+1] = csrk+1
+        csrcolval[csrk] = Jk
+        csrnzval[csrk] = V[k]
+    end
+    # This completes the unsorted-row, has-repeats CSR form's construction
+
+    # Sweep through the CSR form, simultaneously (1) caculating the CSC form's column
+    # counts and storing them shifted forward by one in csccolptr; (2) detecting repeated
+    # entries; and (3) repacking the CSR form with the repeated entries combined.
+    #
+    # Minimizing extraneous communication and nonlocality of reference, primarily by using
+    # only a single auxiliary array in this step, is the key to this method's performance.
+    fill!(csccolptr, 0)
+    fill!(klasttouch, 0)
+    writek = 1
+    newcsrrowptri = 1
+    origcsrrowptri = 1
+    origcsrrowptrip1 = csrrowptr[2]
+    @inbounds for i in 1:m
+        for readk in origcsrrowptri:(origcsrrowptrip1-1)
+            j = csrcolval[readk]
+            if klasttouch[j] < newcsrrowptri
+                klasttouch[j] = writek
+                if writek != readk
+                    csrcolval[writek] = j
+                    csrnzval[writek] = csrnzval[readk]
+                end
+                writek += 1
+                csccolptr[j+1] += 1
+            else
+                klt = klasttouch[j]
+                csrnzval[klt] = combine(csrnzval[klt], csrnzval[readk])
+            end
+        end
+        newcsrrowptri = writek
+        origcsrrowptri = origcsrrowptrip1
+        origcsrrowptrip1 != writek && (csrrowptr[i+1] = writek)
+        i < m && (origcsrrowptrip1 = csrrowptr[i+2])
+    end
+
+    # Compute the CSC form's colptrs and store them shifted forward by one in csccolptr
+    countsum = 1
+    csccolptr[1] = 1
+    @inbounds for j in 2:(n+1)
+        overwritten = csccolptr[j]
+        csccolptr[j] = countsum
+        countsum += overwritten
+    end
+
+    # Now knowing the CSC form's entry count, resize cscrowval and cscnzval if necessary
+    cscnnz = countsum - 1
+    length(cscrowval) < cscnnz && resize!(cscrowval, cscnnz)
+    length(cscnzval) < cscnnz && resize!(cscnzval, cscnnz)
+
+    # Finally counting-sort the row and nonzero values from the CSR form into cscrowval and
+    # cscnzval. Tracking write positions in csccolptr corrects the column pointers.
+    @inbounds for i in 1:m
+        for csrk in csrrowptr[i]:(csrrowptr[i+1]-1)
+            j = csrcolval[csrk]
+            x = csrnzval[csrk]
+            csck = csccolptr[j+1]
+            csccolptr[j+1] = csck+1
+            cscrowval[csck] = i
+            cscnzval[csck] = x
+        end
+    end
+
+    SparseMatrixCSC(m, n, csccolptr, cscrowval, cscnzval)
+end
+function sparse!{Tv,Ti<:Integer}(I::AbstractVector{Ti}, J::AbstractVector{Ti},
+        V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Ti},
+        csrrowptr::Vector{Ti}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
+        csccolptr::Vector{Ti} )
+    sparse!(I, J, V, m, n, combine, klasttouch,
+            csrrowptr, csrcolval, csrnzval,
+            csccolptr, Vector{Ti}(), Vector{Tv}() )
+end
+function sparse!{Tv,Ti<:Integer}(I::AbstractVector{Ti}, J::AbstractVector{Ti},
+        V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Ti},
+        csrrowptr::Vector{Ti}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv} )
+    sparse!(I, J, V, m, n, combine, klasttouch,
+            csrrowptr, csrcolval, csrnzval,
+            Vector{Ti}(n+1), Vector{Ti}(), Vector{Tv}() )
+end
 
 dimlub(I) = isempty(I) ? 0 : Int(maximum(I)) #least upper bound on required sparse matrix dimension
 
diff --git a/test/sparsedir/sparse.jl b/test/sparsedir/sparse.jl
index 4b92a06cf9247b..014e96607c5c35 100644
--- a/test/sparsedir/sparse.jl
+++ b/test/sparsedir/sparse.jl
@@ -8,6 +8,13 @@ using Base.Test
 
 # check sparse matrix construction
 @test isequal(full(sparse(complex(ones(5,5),ones(5,5)))), complex(ones(5,5),ones(5,5)))
+@test_throws ArgumentError sparse([1,2,3], [1,2], [1,2,3], 3, 3)
+@test_throws ArgumentError sparse([1,2,3], [1,2,3], [1,2], 3, 3)
+@test_throws ArgumentError sparse([1,2,3], [1,2,3], [1,2,3], 0, 1)
+@test_throws ArgumentError sparse([1,2,3], [1,2,3], [1,2,3], 1, 0)
+@test_throws ArgumentError sparse([1,2,4], [1,2,3], [1,2,3], 3, 3)
+@test_throws ArgumentError sparse([1,2,3], [1,2,4], [1,2,3], 3, 3)
+@test isequal(sparse(Int[], Int[], Int[], 0, 0), SparseMatrixCSC(0, 0, Int[1], Int[], Int[]))
 
 # check matrix operations
 se33 = speye(3)
@@ -303,7 +310,8 @@ mfe22 = eye(Float64, 2)
 @test_throws ArgumentError sparsevec([3,5,7],[0.1,0.0,3.2],4)
 
 # issue #5169
-@test nnz(sparse([1,1],[1,2],[0.0,-0.0])) == 0
+# @test nnz(sparse([1,1],[1,2],[0.0,-0.0])) == 0
+# commented following change to sparse() in #14798, also see #12605, #9928, #9906, #6769
 
 # issue #5386
 K,J,V = findnz(SparseMatrixCSC(2,1,[1,3],[1,2],[1.0,0.0]))
@@ -314,7 +322,8 @@ A = speye(Bool, 5)
 @test find(A) == find(x -> x == true, A) == find(full(A))
 
 # issue #5437
-@test nnz(sparse([1,2,3],[1,2,3],[0.0,1.0,2.0])) == 2
+# @test nnz(sparse([1,2,3],[1,2,3],[0.0,1.0,2.0])) == 2
+# commented following change to sparse() in #14798, also see #12605, #9928, #9906, #6769
 
 # issue #5824
 @test sprand(4,5,0.5).^0 == sparse(ones(4,5))