diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 2eeb63f01..6469d660a 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -513,6 +513,122 @@ public extension Conv2D { } } +/// A 3-D convolution layer for spatial/spatio-temporal convolution over images. +/// +/// This layer creates a convolution filter that is convolved with the layer input to produce a +/// tensor of outputs. +@_fixed_layout +public struct Conv3D: Layer { + /// The 5-D convolution kernel. + public var filter: Tensor + /// The bias vector. + public var bias: Tensor + /// An activation function. + public typealias Activation = @differentiable (Tensor) -> Tensor + /// The element-wise activation function. + @noDerivative public let activation: Activation + /// The strides of the sliding window for spatial dimensions. + @noDerivative public let strides: (Int, Int, Int) + /// The padding algorithm for convolution. + @noDerivative public let padding: Padding + + /// Creates a `Conv3D` layer with the specified filter, bias, activation function, strides, and + /// padding. + /// + /// - Parameters: + /// - filter: The 5-D convolution kernel. + /// - bias: The bias vector. + /// - activation: The element-wise activation function. + /// - strides: The strides of the sliding window for spatial dimensions. + /// - padding: The padding algorithm for convolution. + public init( + filter: Tensor, + bias: Tensor, + activation: @escaping Activation, + strides: (Int, Int, Int), + padding: Padding + ) { + self.filter = filter + self.bias = bias + self.activation = activation + self.strides = strides + self.padding = padding + } + + /// Returns the output obtained from applying the layer to the given input. + /// + /// - Parameter input: The input to the layer. + /// - Returns: The output. + @differentiable + public func call(_ input: Tensor) -> Tensor { + return activation(input.convolved3D(withFilter: filter, + strides: (1, strides.0, strides.1, strides.2, 1), + padding: padding) + bias) + } +} + +public extension Conv3D { + /// Creates a `Conv3D` layer with the specified filter shape, strides, padding, and + /// element-wise activation function. The filter tensor is initialized using Glorot uniform + /// initialization with the specified generator. The bias vector is initialized with zeros. + /// + /// - Parameters: + /// - filterShape: The shape of the 5-D convolution kernel. + /// - strides: The strides of the sliding window for spatial/spatio-temporal dimensions. + /// - padding: The padding algorithm for convolution. + /// - activation: The element-wise activation function. + /// - generator: The random number generator for initialization. + /// + /// - Note: Use `init(filterShape:strides:padding:activation:seed:)` for faster random + /// initialization. + init( + filterShape: (Int, Int, Int, Int, Int), + strides: (Int, Int, Int) = (1, 1, 1), + padding: Padding = .valid, + activation: @escaping Activation = identity, + generator: inout G + ) { + let filterTensorShape = TensorShape([ + filterShape.0, filterShape.1, filterShape.2, filterShape.3, filterShape.4]) + self.init( + filter: Tensor(glorotUniform: filterTensorShape, generator: &generator), + bias: Tensor(zeros: TensorShape([filterShape.4])), + activation: activation, + strides: strides, + padding: padding) + } +} + +public extension Conv3D { + /// Creates a `Conv3D` layer with the specified filter shape, strides, padding, and + /// element-wise activation function. The filter tensor is initialized using Glorot uniform + /// initialization with the specified seed. The bias vector is initialized with zeros. + /// + /// - Parameters: + /// - filterShape: The shape of the 5-D convolution kernel. + /// - strides: The strides of the sliding window for spatial/spatio-temporal dimensions. + /// - padding: The padding algorithm for convolution. + /// - activation: The element-wise activation function. + /// - seed: The random seed for initialization. The default value is random. + init( + filterShape: (Int, Int, Int, Int, Int), + strides: (Int, Int, Int) = (1, 1, 1), + padding: Padding = .valid, + activation: @escaping Activation = identity, + seed: (Int64, Int64) = (Int64.random(in: Int64.min.., + filter: Tensor, + strides: (Int, Int, Int, Int, Int), + padding: Padding + ) -> Tensor { + return Raw.conv3DBackpropInputV2( + inputSizes: shape, + filter: filter, + outBackprop: self, + strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2), + Int32(strides.3), Int32(strides.4)], + padding: padding.raw) + } + + /// TensorFlow builtin conv3d gradient helper for the filter. + @inlinable + @differentiable(wrt: (self, input), vjp: _vjpConv3DBackpropFilter) + internal func conv3DBackpropFilter( + input: Tensor, + filterSizes: Tensor, + strides: (Int, Int, Int, Int, Int), + padding: Padding + ) -> Tensor { + return Raw.conv3DBackpropFilterV2( + self, + filterSizes: filterSizes, + outBackprop: self, + strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2), + Int32(strides.3), Int32(strides.4)], + padding: padding.raw) + } + + @inlinable + internal func _vjpConv3DBackpropInput( + _ shape: Tensor, + _ filter: Tensor, + _ strides: (Int, Int, Int, Int, Int), + _ padding: Padding + ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + let value = conv3DBackpropInput(shape: shape, filter: filter, strides: strides, + padding: padding) + return (value, { v in + return ( + self.conv3DBackpropFilter(input: v, filterSizes: shape, strides: strides, + padding: padding), + v.convolved3D(withFilter: filter, strides: strides, padding: padding) + ) + }) + } + + @inlinable + internal func _vjpConv3DBackpropFilter( + _ input: Tensor, + _ filterSizes: Tensor, + _ strides: (Int, Int, Int, Int, Int), + _ padding: Padding + ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + let value = conv3DBackpropFilter(input: input, filterSizes: filterSizes, + strides: strides, padding: padding) + return (value, { v in + return ( + self.conv3DBackpropInput(shape: filterSizes, filter: v, strides: strides, + padding: padding), + input.convolved3D(withFilter: v, strides: strides, padding: padding) + ) + }) + } + + @inlinable + internal func _vjpConvolved3D( + filter: Tensor, + strides: (Int, Int, Int, Int, Int), + padding: Padding + ) -> (Tensor, (Tensor) -> (Tensor, Tensor)) { + let value = convolved3D(withFilter: filter, strides: strides, + padding: padding) + return (value, { v in + return ( + v.conv3DBackpropInput( + shape: self.shapeTensor, filter: filter, + strides: strides, padding: padding + ), + v.conv3DBackpropFilter( + input: self, filterSizes: filter.shapeTensor, + strides: strides, padding: padding + ) + ) + }) + } + @inlinable internal func _vjpMaxPooled2D( kernelSize: (Int, Int, Int, Int), @@ -345,6 +439,34 @@ public extension Tensor where Scalar: FloatingPoint { explicitPaddings: []) } + /// Computes a 3-D convolution using `self` as input, with the specified + /// filter, strides, and padding. + /// + /// - Parameters: + /// - filter: The convolution filter. + /// - strides: The strides of the sliding filter for each dimension of the + /// input. + /// - padding: The padding for the operation. + /// - Precondition: `self` must have rank 5. + /// - Precondition: `filter` must have rank 5. + @inlinable @inline(__always) + @differentiable( + wrt: (self, filter), vjp: _vjpConvolved3D + where Scalar: TensorFlowFloatingPoint + ) + func convolved3D( + withFilter filter: Tensor, + strides: (Int, Int, Int, Int, Int), + padding: Padding + ) -> Tensor { + return Raw.conv3D( + self, + filter: filter, + strides: [Int32(strides.0), Int32(strides.1), Int32(strides.2), + Int32(strides.3), Int32(strides.4)], + padding: padding.raw) + } + /// Computes a 2-D max pooling, with the specified kernel sizes, strides, and /// padding. /// diff --git a/Tests/DeepLearningTests/LayerTests.swift b/Tests/DeepLearningTests/LayerTests.swift index 4647f0366..22e2ec403 100644 --- a/Tests/DeepLearningTests/LayerTests.swift +++ b/Tests/DeepLearningTests/LayerTests.swift @@ -23,7 +23,19 @@ final class LayerTests: XCTestCase { let input = Tensor([[0, 1, 2, 3, 4], [10, 11, 12, 13, 14]]).expandingShape(at: 2) let output = layer.inferring(from: input) let expected = Tensor([[[1, 4], [2, 7], [3, 10]], [[11, 34], [12, 37], [13, 40]]]) - XCTAssertEqual(round(output), expected) + XCTAssertEqual(output, expected) + } + + func testConv3D() { + let filter = Tensor(shape: [1, 2, 2, 2, 1], scalars: (0..<8).map(Float.init)) + let bias = Tensor([-1, 1]) + let layer = Conv3D(filter: filter, bias: bias, activation: identity, + strides: (1, 2, 1), padding: .valid) + let input = Tensor(shape: [2, 2, 2, 2, 2], scalars: (0..<32).map(Float.init)) + let output = layer.inferring(from: input) + let expected = Tensor(shape: [2, 2, 1, 1, 2], + scalars: [139, 141, 363, 365, 587, 589, 811, 813]) + XCTAssertEqual(output, expected) } func testMaxPool1D() { @@ -68,7 +80,7 @@ final class LayerTests: XCTestCase { func testAvgPool3D() { let layer = AvgPool3D(poolSize: (2, 4, 5), strides: (1, 1, 1), padding: .valid) - let input = Tensor(shape: [1, 2, 4, 5, 1], scalars: (0..<20).map(Float.init)) + let input = Tensor(shape: [1, 2, 4, 5, 1], scalars: (0..<40).map(Float.init)) let output = layer.inferring(from: input) let expected = Tensor([[[[[9.5]]]]]) XCTAssertEqual(output, expected) @@ -187,6 +199,7 @@ final class LayerTests: XCTestCase { static var allTests = [ ("testConv1D", testConv1D), + ("testConv3D", testConv3D), ("testMaxPool1D", testMaxPool1D), ("testMaxPool2D", testMaxPool2D), ("testMaxPool3D", testMaxPool3D),