Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: cache oblivious linear algebra algorithms #6690

Merged
merged 1 commit into from
May 26, 2014
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 70 additions & 32 deletions base/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1238,56 +1238,94 @@ function filter(f::Function, a::Vector)
end

## Transpose ##
const transposebaselength=64
function transpose!(B::StridedMatrix,A::StridedMatrix)
m, n = size(A)
size(B) == (n,m) || throw(DimensionMismatch("transpose"))

const sqrthalfcache = 1<<7
function transpose!{T<:Number}(B::Matrix{T}, A::Matrix{T})
if m*n<=4*transposebaselength
@inbounds begin
for j = 1:n
for i = 1:m
B[j,i] = A[i,j]
end
end
end
else
transposeblock!(B,A,m,n,0,0)
end
return B
end
function transposeblock!(B::StridedMatrix,A::StridedMatrix,m::Int,n::Int,offseti::Int,offsetj::Int)
if m*n<=transposebaselength
@inbounds begin
for j = offsetj+(1:n)
for i = offseti+(1:m)
B[j,i] = A[i,j]
end
end
end
elseif m>n
newm=m>>1
transposeblock!(B,A,newm,n,offseti,offsetj)
transposeblock!(B,A,m-newm,n,offseti+newm,offsetj)
else
newn=n>>1
transposeblock!(B,A,m,newn,offseti,offsetj)
transposeblock!(B,A,m,n-newn,offseti,offsetj+newn)
end
return B
end
function ctranspose!(B::StridedMatrix,A::StridedMatrix)
m, n = size(A)
if size(B) != (n,m)
error("input and output must have same size")
end
elsz = isbits(T) ? sizeof(T) : sizeof(Ptr)
blocksize = ifloor(sqrthalfcache/elsz/1.4) # /1.4 to avoid complete fill of cache
if m*n <= 4*blocksize*blocksize
# For small sizes, use a simple linear-indexing algorithm
for i2 = 1:n
j = i2
offset = (j-1)*m
for i = offset+1:offset+m
B[j] = A[i]
j += n
size(B) == (n,m) || throw(DimensionMismatch("transpose"))

if m*n<=4*transposebaselength
@inbounds begin
for j = 1:n
for i = 1:m
B[j,i] = conj(A[i,j])
end
end
end
return B
else
ctransposeblock!(B,A,m,n,0,0)
end
# For larger sizes, use a cache-friendly algorithm
for outer2 = 1:blocksize:size(A, 2)
for outer1 = 1:blocksize:size(A, 1)
for inner2 = outer2:min(n,outer2+blocksize)
i = (inner2-1)*m + outer1
j = inner2 + (outer1-1)*n
for inner1 = outer1:min(m,outer1+blocksize)
B[j] = A[i]
i += 1
j += n
return B
end
function ctransposeblock!(B::StridedMatrix,A::StridedMatrix,m::Int,n::Int,offseti::Int,offsetj::Int)
if m*n<=transposebaselength
@inbounds begin
for j = offsetj+(1:n)
for i = offseti+(1:m)
B[j,i] = conj(A[i,j])
end
end
end
elseif m>n
newm=m>>1
ctransposeblock!(B,A,newm,n,offseti,offsetj)
ctransposeblock!(B,A,m-newm,n,offseti+newm,offsetj)
else
newn=n>>1
ctransposeblock!(B,A,m,newn,offseti,offsetj)
ctransposeblock!(B,A,m,n-newn,offseti,offsetj+newn)
end
B
return B
end

function transpose{T<:Number}(A::Matrix{T})
function transpose(A::StridedMatrix)
B = similar(A, size(A, 2), size(A, 1))
transpose!(B, A)
end

function ctranspose{T<:Complex}(A::StridedMatrix{T})
B = similar(A, size(A, 2), size(A, 1))
ctranspose!(B, A)
end
ctranspose{T<:Real}(A::StridedVecOrMat{T}) = transpose(A)

transpose(x::StridedVector) = [ x[j] for i=1, j=1:size(x,1) ]
transpose(x::StridedMatrix) = [ x[j,i] for i=1:size(x,2), j=1:size(x,1) ]

ctranspose{T}(x::StridedVector{T}) = T[ conj(x[j]) for i=1, j=1:size(x,1) ]
ctranspose{T}(x::StridedMatrix{T}) = T[ conj(x[j,i]) for i=1:size(x,2), j=1:size(x,1) ]

# set-like operators for vectors
# These are moderately efficient, preserve order, and remove dupes.
Expand Down