diff --git a/src/nnlib.jl b/src/nnlib.jl index ef3e85b..bd92079 100644 --- a/src/nnlib.jl +++ b/src/nnlib.jl @@ -2,18 +2,31 @@ using NNlib using NNlib: expand using NNlib: PoolDims -import NNlib: conv +import NNlib: conv, depthwiseconv -function NNlib.conv(x::Tensor{xT, N}, w::Tensor, b::Tensor{T}, cdims::DenseConvDims{M,K,C_in,C_out,S,P,D,F}; - stride = 1, pad = 0, dilation = 1) where {T,N, xT, M,K,C_in,C_out,S,P,D,F} - - op = conv2d(x, w, b, stride = collect(S), padding = [P[1];P[3]], dilation = collect(dilation)) +function NNlib.conv(x::Tensor{xT, N}, w::Tensor, b::Tensor{T}, + cdims::DenseConvDims{M,K,C_in,C_out,S,P,D,F}) where {T,N,xT,M,K,C_in,C_out,S,P,D,F} + op = conv2d(x, w, b, stride = collect(S), padding = [P[1];P[3]], dilation = collect(D)) op end -function NNlib.conv(x::Tensor, w::Tensor, cdims::DenseConvDims; stride = 1, pad = 0, dilation = 1) +function NNlib.conv(x::Tensor, w::Tensor, cdims::DenseConvDims) b = zeros(Tensor{Float32}, size(w)[end], dev = on(w)) - op = conv(x, w, b, cdims, stride = stride, pad = pad, dilation = dilation) + op = conv(x, w, b, cdims) + op +end + +function NNlib.depthwiseconv(x::Tensor{xT, N}, w::Tensor, b::Tensor{T}; + stride = 1, pad = 0, dilation = 1) where {T, N, xT} + op = _depthwise_conv2d(x, w, b, stride = collect(stride), padding = collect(pad), + dilation = collect(dilation)) + op +end + +function NNlib.depthwiseconv(x::Tensor, w::Tensor; stride = 1, pad = 0, dilation = 1) + b = zeros(Tensor{Float32}, size(w)[end], dev = on(w)) + op = depthwiseconv(x, w, b, stride = collect(stride), pad = collect(pad), + dilation = collect(dilation)) op end @@ -38,8 +51,15 @@ function NNlib.sigmoid(t::Tensor{T,N}) where {T,N} Tensor{T,N}(ptr[], on(t)) end +function NNlib.tanh(t::Tensor{T,N}) where {T,N} + ptr = Ref(Ptr{Cvoid}()) + + atg_tanh(ptr, t.ptr) + Tensor{T,N}(ptr[], on(t)) +end + function NNlib.softmax(t::Tensor{T,N}; dims = 1) where {T,N} - _softmax(t, N - dims, options[T]) + _softmax(t, dims, options[T]) end function NNlib.∇softmax(Δ, xs::Tensor; dims = 1) @@ -48,15 +68,20 @@ function NNlib.∇softmax(Δ, xs::Tensor; dims = 1) sf .* (t .- sum(t .* sf, dims = dims)) end -function NNlib.meanpool(t::Tensor, pdims::PoolDims{N,K,S,P,D}; kw...) where {N,K,S,P,D} +function NNlib.meanpool(t::Tensor, pdims::PoolDims{N,K,S,P,D}) where {N,K,S,P,D} ks = collect(NNlib.kernel_size(pdims)) stride = collect(S) - pad = [P[1];P[3]] - op_sz = NNlib.output_size(pdims) + padding = [P[1];P[3]] + # op_sz = NNlib.output_size(pdims) - _meanpool(t, ks, stride, pad, op_sz) + _meanpool(t, ks, stride=stride, padding=padding) end function NNlib.maxpool(t::Tensor, pdims::PoolDims{N,K,S,P,D}) where {N,K,S,P,D} - _maxpool(t, pdims) + ks = collect(NNlib.kernel_size(pdims)) + stride = collect(S) + padding = [P[1];P[3]] + dilation = collect(D) + + _maxpool(t, ks, stride=stride, padding=padding, dilation=dilation) end diff --git a/src/ops.jl b/src/ops.jl index 855b00a..09f71b4 100644 --- a/src/ops.jl +++ b/src/ops.jl @@ -82,39 +82,102 @@ end # TODO: Use a macro to generate wrappers function conv2d(input::Tensor{T}, filter::Tensor{T,N}, bias::Tensor{T}; - stride = [1], - padding = [0], - dilation = [1], - groups = 1) where {T,N} + stride = [1], + padding = [0], + dilation = [1], + groups = 1) where {T,N} ptr = Ref(Ptr{Cvoid}()) atg_conv2d(ptr, input.ptr, filter.ptr, bias.ptr, - stride, length(stride), - padding, length(padding), - dilation, length(dilation), - groups) + reverse(stride), length(stride), + reverse(padding), length(padding), + reverse(dilation), length(dilation), + groups) Tensor{T,N}(ptr[], on(input)) end +function conv_transpose_2d(input::Tensor{T}, filter::Tensor{T,N}, bias::Tensor{T}; + stride = [1], + padding = [0], + output_padding = [0], + dilation = [1], + groups = 1) where {T,N} + + ptr = Ref(Ptr{Cvoid}()) + + atg_conv_transpose2d(ptr, input.ptr, filter.ptr, bias.ptr, + reverse(stride), length(stride), + reverse(padding), length(padding), + reverse(output_padding), length(output_padding), + groups, + reverse(dilation), length(dilation)) + + Tensor{T,N}(ptr[], on(input)) +end + +function _depthwise_conv2d(input::Tensor{T}, filter::Tensor{T,N}, bias::Tensor{T}; + stride = [1], + padding = [0], + dilation = [1]) where {T,N} + + # When groups == in_channels and out_channels == K * in_channels, where K is a positive integer, + # this operation is also termed in literature as depthwise convolution. + + c_in = size(input)[end - 1] # number of input channels + c_out = size(filter)[end] # number of output channels + @assert mod(c_in, c_out) == 0 "Invalid kernel size for depthwise convolution" + + groups = c_in + ptr = Ref(Ptr{Cvoid}()) + + atg_conv2d(ptr, input.ptr, filter.ptr, bias.ptr, + reverse(stride), length(stride), + reverse(padding), length(padding), + reverse(dilation), length(dilation), + groups) + + Tensor{T,N}(ptr[], on(input)) +end + function _softmax(input::Tensor{T,N}, dims = 1, dtype = options[T]) where {T,N} ptr = Ref(Ptr{Cvoid}()) - atg_softmax(ptr, input.ptr, N - dims - 1, dtype) + atg_softmax(ptr, input.ptr, N - dims, dtype) Tensor{T,N}(ptr[], on(input)) end -function _meanpool(t::Tensor{T,N}, k, s, p, op_sz) where {T,N} +function _meanpool(t::Tensor{T,N}, kernel_size; stride = [1] , padding = [0]) where {T,N} + k = collect(kernel_size) + s = collect(stride) + p = collect(padding) ptr = Ref(Ptr{Cvoid}()) atg_avg_pool2d(ptr, t.ptr, - k, length(k), - s, length(s), - p, length(p), - 0, # ceil_mode - 1, # count_include_pad - 1 # divisor_override + reverse(k), length(k), + reverse(s), length(s), + reverse(p), length(p), + 0, # ceil_mode + 1, # count_include_pad + prod(k) # divisor_override + ) + Tensor{T,N}(ptr[], on(t)) +end + +function _maxpool(t::Tensor{T,N}, kernel_size; stride = [1], padding = [0], dilation = [1]) where {T,N} + k = collect(kernel_size) + s = collect(stride) + p = collect(padding) + d = collect(dilation) + ptr = Ref(Ptr{Cvoid}()) + + atg_max_pool2d(ptr, t.ptr, + reverse(k), length(k), + reverse(s), length(s), + reverse(p), length(p), + reverse(d), length(d), + 0, # ceil_mode ) Tensor{T,N}(ptr[], on(t)) end @@ -129,10 +192,10 @@ function _maxpool(t::Tensor{T,M}, pdims::PoolDims{N,K,S,P,D}; ptr = Ref(Ptr{Cvoid}()) atg_max_pool2d(ptr, t.ptr, - k, length(k), - s, length(s), - p, length(p), - d, length(d), + reverse(k), length(k), + reverse(s), length(s), + reverse(p), length(p), + reverse(d), length(d), ceil_mode, # ceil_mode ) @@ -149,16 +212,67 @@ function _maxpool_with_inds(t::Tensor{T,M}, pdims::PoolDims{N,K,S,P,D}; ptr = [Ptr{Cvoid}(), Ptr{Cvoid}()] atg_max_pool2d_with_indices(ptr, t.ptr, - k, length(k), - s, length(s), - p, length(p), - d, length(d), - ceil_mode, # ceil_mode + reverse(k), length(k), + reverse(s), length(s), + reverse(p), length(p), + reverse(d), length(d), + ceil_mode, ) Tensor{T,M}(ptr[1], on(t)), Tensor{T,M}(ptr[2], on(t)) end +function _upsample_nearest2d(t::Tensor{T,N}, output_size) where {T,N} + ptr = Ref(Ptr{Cvoid}()) + + atg_upsample_nearest2d(ptr, t.ptr, + reverse(output_size), length(output_size), + ) + Tensor{T,N}(ptr[], on(t)) +end + +function _upsample_bilinear2d(t::Tensor{T,N}, output_size, align_corners = true) where {T,N} + ptr = Ref(Ptr{Cvoid}()) + + atg_upsample_bilinear2d(ptr, t.ptr, + reverse(output_size), length(output_size), + align_corners, + ) + Tensor{T,N}(ptr[], on(t)) +end + +function _upsample_bicubic2d(t::Tensor{T,N}, output_size, align_corners = true) where {T,N} + ptr = Ref(Ptr{Cvoid}()) + + atg_upsample_bicubic2d(ptr, t.ptr, + reverse(output_size), length(output_size), + align_corners, + ) + Tensor{T,N}(ptr[], on(t)) +end + +function upsample(t::Tensor{T,N}, output_size, mode) where {T,N} + if mode == :NEAREST + _upsample_nearest2d(t, output_size) + elseif mode == :LINEAR + _upsample_bilinear2d(t, output_size) + elseif mode == :CUBIC + _upsample_bicubic2d(t, output_size) + else + error("Unsupported mode $(mode).") + end +end + +function pad(t::Tensor{T,N}, padding) where {T,N} + ptr = Ref(Ptr{Cvoid}()) + p = collect(padding) + + atg_constant_pad_nd(ptr, t.ptr, + p, length(p), + ) + Tensor{T,N}(ptr[], on(t)) +end + function _chunk(t::Tensor{T,N}, chunks=2, dims=1) where {T,N} ts = [Ptr{Cvoid}() for _ in 1:chunks] atg_chunk(ts, t.ptr, chunks, N - dims) diff --git a/test/test_nnlib.jl b/test/test_nnlib.jl new file mode 100644 index 0000000..db9eeff --- /dev/null +++ b/test/test_nnlib.jl @@ -0,0 +1,203 @@ +using Test +using NNlib +using Torch: tensor + + +@testset "DepthwiseConv" begin + for kernel_width in [1, 3, 5], + kernel_height in [1, 2, 4], + in_channels in [1, 2], + out_channels in [1, 2] + + kernel = rand(-9.0f0:9.0f0, kernel_height, kernel_width, 1, in_channels) + + for height in [5, 6], + width in [5, 7] + + test_input = rand(-9.0f0:9.0f0, height, width, in_channels, 1) + x = tensor(test_input, dev = 0) + w = tensor(kernel, dev = 0) + + expected_output = NNlib.depthwiseconv(test_input, kernel, pad = (0,0), stride = (1,1 ), dilation = (1, 1), flipped = true) + test_output = NNlib.depthwiseconv(x, w, pad = (0,0), stride = (1,1 ), dilation = (1, 1)) + + test_output = Array(test_output) + @test maximum(abs.(test_output - expected_output)) < 10 * eps(Float32) + end + end +end + + +@testset "Conv with padding" begin + for kernel_width in [1, 2, 3, 5], + kernel_height in [1, 2, 3, 5], + in_channels in [1, 2], + out_channels in [1, 2] + + num_coefficients = (kernel_width * kernel_height * in_channels * out_channels) + kernel = reshape(1.0f0:num_coefficients, kernel_height, kernel_width, in_channels, out_channels) + kernel = collect(kernel) + pad = size(kernel)[1:2] .÷ 2 + + for height in [1, 2, 3, 4], + width in [1, 2, 3, 5] + + test_input = zeros(Float32, height, width, in_channels, 1) + test_input[(height + 1) ÷ 2, (width + 1) ÷ 2, 1, 1] = 1 + x = tensor(test_input, dev = 0) + w = tensor(kernel, dev = 0) + + cdims = NNlib.DenseConvDims(size(test_input), + size(kernel), + stride=(1, 1), + padding=pad, + dilation=(1, 1), + flipkernel = true) + + expected_output = NNlib.conv(test_input, kernel, cdims) + test_output = NNlib.conv(x, w, cdims) + + test_output = Array(test_output) + @test maximum(abs.(test_output - expected_output)) < 10 * eps(Float32) + end + end +end + + +@testset "Conv with stride" begin + for kernel_width in [1, 3, 4], + kernel_height in [1, 2, 5], + in_channels in [1], + out_channels in [1], + row_stride in [1, 2, 4], + column_stride in [1, 3, 5] + + kernel = fill(1.0f0, kernel_height, kernel_width, in_channels, out_channels) + kernel = collect(kernel) + + for height in 13:(13 + row_stride - 1), + width in 15:(15 + column_stride - 1) + + sz_in = [height, width, in_channels, 1] + test_input = reshape(1.0f0:prod(sz_in), height, width, in_channels, 1) + test_input = collect(test_input) + x = tensor(test_input, dev = 0) + w = tensor(kernel, dev = 0) + + cdims = NNlib.DenseConvDims(size(test_input), + size(kernel), + stride=(row_stride, column_stride), + padding=(0, 0), + dilation=(1, 1), + flipkernel = true) + + expected_output = NNlib.conv(test_input, kernel, cdims) + test_output = NNlib.conv(x, w, cdims) + + test_output = Array(test_output) + @test maximum(abs.(test_output - expected_output)) < 10 * eps(Float32) + end + end +end + + +@testset "Conv with dilation" begin + for kernel_width in 1, + kernel_height in 1:9, + in_channels in 1, + out_channels in 1, + row_stride in 1:11, + column_stride in 1, + row_rate in 1:4, + column_rate in 1 + + if kernel_height * row_rate > 13 + continue + end + + kernel = fill(1.0f0, kernel_height, kernel_width, in_channels, out_channels) + kernel = collect(kernel) + + for height in 13:(13 + row_stride - 1), + width in [1] + + sz_in = [height, width, in_channels, 1] + test_input = reshape(1.0f0:prod(sz_in), height, width, in_channels, 1) + test_input = collect(test_input) + x = tensor(test_input, dev = 0) + w = tensor(kernel, dev = 0) + + cdims = NNlib.DenseConvDims(size(test_input), + size(kernel), + stride=(row_stride, column_stride), + padding=(0, 0), + dilation=(1, 1), + flipkernel = true) + + expected_output = NNlib.conv(test_input, kernel, cdims) + test_output = NNlib.conv(x, w, cdims) + + test_output = Array(test_output) + @test maximum(abs.(test_output - expected_output)) < 10 * eps(Float32) + end + end +end + + +@testset "Pooling" begin + for fn in (NNlib.maxpool, NNlib.meanpool), + column_span in 1:3, + row_span in 1:3, + column_stride in 1:3, + row_stride in 1:3, + pad in (false, true) + + if pad + padding = (row_span, column_span) .÷ 2 + else + padding = (0, 0) + end + + for height in (1:2) * row_span * row_stride, + width in (1:2) * column_span * column_stride, + channels in 1:2 + + test_input = rand(0.0f0:9.0f0, height, width, channels, 1) + x = tensor(test_input, dev = 0) + + pdims = NNlib.PoolDims(size(test_input), + (row_span, column_span), + padding=padding, + stride=(row_stride, column_stride)) + + expected_output = fn(test_input, pdims) + test_output = fn(x, pdims) + + test_output = Array(test_output) + @test maximum(abs.(test_output - expected_output)) < 10 * eps(Float32) + end + end +end + + +@testset "Activations" begin + for fn in (NNlib.relu, NNlib.tanh, NNlib.sigmoid, NNlib.leakyrelu, NNlib.softmax), + height in [1, 2, 3, 4, 7], + width in [1, 2, 3, 5, 6], + channels in 1:3 + + test_input = rand(-9.0f0:9.0f0, height, width, channels, 1) + x = tensor(test_input, dev = 0) + + if fn == NNlib.softmax + expected_output = fn(test_input, dims = 3) + test_output = fn(x, dims = 3) + else + expected_output = fn.(test_input) + test_output = fn(x) + end + + test_output = Array(test_output) + @test maximum(abs.(test_output - expected_output)) < 10 * eps(Float32) + end +end