FluxML · bors · Sep 8, 2020 · Aug 24, 2020 · Aug 26, 2020 · DhairyaLGandhi
diff --git a/src/nnlib.jl b/src/nnlib.jl
@@ -2,18 +2,31 @@ using NNlib
 using NNlib: expand
 using NNlib: PoolDims
 
-import NNlib: conv
+import NNlib: conv, depthwiseconv
 
-function NNlib.conv(x::Tensor{xT, N}, w::Tensor, b::Tensor{T}, cdims::DenseConvDims{M,K,C_in,C_out,S,P,D,F};
-                    stride = 1, pad = 0, dilation = 1) where {T,N, xT,  M,K,C_in,C_out,S,P,D,F}
-
-  op = conv2d(x, w, b, stride = collect(S), padding = [P[1];P[3]], dilation = collect(dilation))
+function NNlib.conv(x::Tensor{xT, N}, w::Tensor, b::Tensor{T},
+                    cdims::DenseConvDims{M,K,C_in,C_out,S,P,D,F}) where {T,N,xT,M,K,C_in,C_out,S,P,D,F}
+  op = conv2d(x, w, b, stride = collect(S), padding = [P[1];P[3]], dilation = collect(D))
   op
 end
 
-function NNlib.conv(x::Tensor, w::Tensor, cdims::DenseConvDims; stride = 1, pad = 0, dilation = 1)
+function NNlib.conv(x::Tensor, w::Tensor, cdims::DenseConvDims)
   b = zeros(Tensor{Float32}, size(w)[end], dev = on(w))
-  op = conv(x, w, b, cdims, stride = stride, pad = pad, dilation = dilation)
+  op = conv(x, w, b, cdims)
+  op
+end
+
+function NNlib.depthwiseconv(x::Tensor{xT, N}, w::Tensor, b::Tensor{T};
+                             stride = 1, pad = 0, dilation = 1) where {T, N, xT}
+  op = _depthwise_conv2d(x, w, b, stride = collect(stride), padding = collect(pad),
+                         dilation = collect(dilation))
+  op
+end
+
+function NNlib.depthwiseconv(x::Tensor, w::Tensor; stride = 1, pad = 0, dilation = 1)
+  b = zeros(Tensor{Float32}, size(w)[end], dev = on(w))
+  op = depthwiseconv(x, w, b, stride = collect(stride), pad = collect(pad),
+                     dilation = collect(dilation))
   op
 end
 
@@ -38,8 +51,15 @@ function NNlib.sigmoid(t::Tensor{T,N}) where {T,N}
   Tensor{T,N}(ptr[], on(t))
 end
 
+function NNlib.tanh(t::Tensor{T,N}) where {T,N}
+  ptr = Ref(Ptr{Cvoid}())
+
+  atg_tanh(ptr, t.ptr)
+  Tensor{T,N}(ptr[], on(t))
+end
+
 function NNlib.softmax(t::Tensor{T,N}; dims = 1) where {T,N}
-  _softmax(t, N - dims, options[T])
+  _softmax(t, dims, options[T])
 end
 
 function NNlib.∇softmax(Δ, xs::Tensor; dims = 1)
@@ -48,15 +68,20 @@ function NNlib.∇softmax(Δ, xs::Tensor; dims = 1)
   sf .* (t .- sum(t .* sf, dims = dims))
 end
 
-function NNlib.meanpool(t::Tensor, pdims::PoolDims{N,K,S,P,D}; kw...) where {N,K,S,P,D}
+function NNlib.meanpool(t::Tensor, pdims::PoolDims{N,K,S,P,D}) where {N,K,S,P,D}
   ks = collect(NNlib.kernel_size(pdims))
   stride = collect(S)
-  pad = [P[1];P[3]]
-  op_sz = NNlib.output_size(pdims)
+  padding = [P[1];P[3]]
+  # op_sz = NNlib.output_size(pdims)
 
-  _meanpool(t, ks, stride, pad, op_sz)
+  _meanpool(t, ks, stride=stride, padding=padding)
 end
 
 function NNlib.maxpool(t::Tensor, pdims::PoolDims{N,K,S,P,D}) where {N,K,S,P,D}
-  _maxpool(t, pdims)
+  ks = collect(NNlib.kernel_size(pdims))
+  stride = collect(S)
+  padding = [P[1];P[3]]
+  dilation = collect(D)
+
+  _maxpool(t, ks, stride=stride, padding=padding, dilation=dilation)
 end
diff --git a/src/ops.jl b/src/ops.jl
@@ -82,39 +82,102 @@ end
 
 # TODO: Use a macro to generate wrappers
 function conv2d(input::Tensor{T}, filter::Tensor{T,N}, bias::Tensor{T};
-		stride = [1],
-		padding = [0],
-		dilation = [1],
-		groups = 1) where {T,N}
+                stride = [1],
+                padding = [0],
+                dilation = [1],
+                groups = 1) where {T,N}
 
   ptr = Ref(Ptr{Cvoid}())
 
   atg_conv2d(ptr, input.ptr, filter.ptr, bias.ptr,
-                stride, length(stride),
-                padding, length(padding),
-                dilation, length(dilation),
-                groups)
+             reverse(stride), length(stride),
+             reverse(padding), length(padding),
+             reverse(dilation), length(dilation),
+             groups)
 
   Tensor{T,N}(ptr[], on(input))
 end
 
+function conv_transpose_2d(input::Tensor{T}, filter::Tensor{T,N}, bias::Tensor{T};
+        stride = [1],
+        padding = [0],
+        output_padding = [0],
+        dilation = [1],
+        groups = 1) where {T,N}
+
+  ptr = Ref(Ptr{Cvoid}())
+
+  atg_conv_transpose2d(ptr, input.ptr, filter.ptr, bias.ptr,
+						reverse(stride), length(stride),
+                       reverse(padding), length(padding),
+                       reverse(output_padding), length(output_padding),
+                       groups,
+                       reverse(dilation), length(dilation))
+
+  Tensor{T,N}(ptr[], on(input))
+end
+
+function _depthwise_conv2d(input::Tensor{T}, filter::Tensor{T,N}, bias::Tensor{T};
+                           stride = [1],
+                           padding = [0],
+                           dilation = [1]) where {T,N}
+
+    # When groups == in_channels and out_channels == K * in_channels, where K is a positive integer,
+    # this operation is also termed in literature as depthwise convolution.
+
+    c_in = size(input)[end - 1]  # number of input channels
+    c_out = size(filter)[end]    # number of output channels
+    @assert mod(c_in, c_out) == 0 "Invalid kernel size for depthwise convolution"
+
+    groups = c_in
+    ptr = Ref(Ptr{Cvoid}())
+
+    atg_conv2d(ptr, input.ptr, filter.ptr, bias.ptr,
+               reverse(stride), length(stride),
+               reverse(padding), length(padding),
+               reverse(dilation), length(dilation),
+               groups)
+
+    Tensor{T,N}(ptr[], on(input))
+end
+
 function _softmax(input::Tensor{T,N}, dims = 1, dtype = options[T]) where {T,N}
   ptr = Ref(Ptr{Cvoid}())
 
-  atg_softmax(ptr, input.ptr, N - dims - 1, dtype)
+  atg_softmax(ptr, input.ptr, N - dims, dtype)
   Tensor{T,N}(ptr[], on(input))
 end
 
-function _meanpool(t::Tensor{T,N}, k, s, p, op_sz) where {T,N}
+function _meanpool(t::Tensor{T,N}, kernel_size; stride = [1] , padding = [0]) where {T,N}
+  k = collect(kernel_size)
+  s = collect(stride)
+  p = collect(padding)
   ptr = Ref(Ptr{Cvoid}())
 
   atg_avg_pool2d(ptr, t.ptr,
-                 k, length(k),
-                 s, length(s),
-                 p, length(p),
-                 0,                # ceil_mode
-                 1,                # count_include_pad
-                 1                 # divisor_override
+                 reverse(k), length(k),
+                 reverse(s), length(s),
+                 reverse(p), length(p),
+                 0,  # ceil_mode
+                 1,  # count_include_pad
+                 prod(k)  # divisor_override
+  )
+  Tensor{T,N}(ptr[], on(t))
+end
+
+function _maxpool(t::Tensor{T,N}, kernel_size; stride = [1], padding = [0], dilation = [1]) where {T,N}
+  k = collect(kernel_size)
+  s = collect(stride)
+  p = collect(padding)
+  d = collect(dilation)
+  ptr = Ref(Ptr{Cvoid}())
+
+  atg_max_pool2d(ptr, t.ptr,
+                 reverse(k), length(k),
+                 reverse(s), length(s),
+                 reverse(p), length(p),
+                 reverse(d), length(d),
+                 0,  # ceil_mode
   )
   Tensor{T,N}(ptr[], on(t))
 end
@@ -129,10 +192,10 @@ function _maxpool(t::Tensor{T,M}, pdims::PoolDims{N,K,S,P,D};
   ptr = Ref(Ptr{Cvoid}())
 
   atg_max_pool2d(ptr, t.ptr,
-                 k, length(k),
-                 s, length(s),
-                 p, length(p),
-                 d, length(d),
+                 reverse(k), length(k),
+                 reverse(s), length(s),
+                 reverse(p), length(p),
+                 reverse(d), length(d),
                  ceil_mode,                # ceil_mode
   )
 
@@ -149,16 +212,67 @@ function _maxpool_with_inds(t::Tensor{T,M}, pdims::PoolDims{N,K,S,P,D};
   ptr = [Ptr{Cvoid}(), Ptr{Cvoid}()]
 
   atg_max_pool2d_with_indices(ptr, t.ptr,
-                 k, length(k),
-                 s, length(s),
-                 p, length(p),
-                 d, length(d),
-                 ceil_mode,                # ceil_mode
+                              reverse(k), length(k),
+                              reverse(s), length(s),
+                              reverse(p), length(p),
+                              reverse(d), length(d),
+                              ceil_mode,
   )
 
   Tensor{T,M}(ptr[1], on(t)), Tensor{T,M}(ptr[2], on(t))
 end
 
+function _upsample_nearest2d(t::Tensor{T,N}, output_size) where {T,N}
+  ptr = Ref(Ptr{Cvoid}())
+
+  atg_upsample_nearest2d(ptr, t.ptr,
+                         reverse(output_size), length(output_size),
+  )
+  Tensor{T,N}(ptr[], on(t))
+end
+
+function _upsample_bilinear2d(t::Tensor{T,N}, output_size, align_corners = true) where {T,N}
+  ptr = Ref(Ptr{Cvoid}())
+
+  atg_upsample_bilinear2d(ptr, t.ptr,
+                         reverse(output_size), length(output_size),
+                         align_corners,
+  )
+  Tensor{T,N}(ptr[], on(t))
+end
+
+function _upsample_bicubic2d(t::Tensor{T,N}, output_size, align_corners = true) where {T,N}
+  ptr = Ref(Ptr{Cvoid}())
+
+  atg_upsample_bicubic2d(ptr, t.ptr,
+                         reverse(output_size), length(output_size),
+                         align_corners,
+  )
+  Tensor{T,N}(ptr[], on(t))
+end
+
+function upsample(t::Tensor{T,N}, output_size, mode) where {T,N}
+    if mode == :NEAREST
+        _upsample_nearest2d(t, output_size)
+    elseif mode == :LINEAR
+        _upsample_bilinear2d(t, output_size)
+    elseif mode == :CUBIC
+        _upsample_bicubic2d(t, output_size)
+    else
+       error("Unsupported mode $(mode).")
+    end
+end
+
+function pad(t::Tensor{T,N}, padding) where {T,N}
+  ptr = Ref(Ptr{Cvoid}())
+  p = collect(padding)
+
+  atg_constant_pad_nd(ptr, t.ptr,
+                      p, length(p),
+  )
+  Tensor{T,N}(ptr[], on(t))
+end
+
 function _chunk(t::Tensor{T,N}, chunks=2, dims=1) where {T,N}
   ts = [Ptr{Cvoid}() for _ in 1:chunks]
   atg_chunk(ts, t.ptr, chunks, N - dims)