diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift index 50d78ad91..6a2f02235 100644 --- a/Sources/DeepLearning/Layer.swift +++ b/Sources/DeepLearning/Layer.swift @@ -51,16 +51,16 @@ public extension Layer { @differentiating(inferring(from:)) @usableFromInline internal func _vjpInferring(from input: Input) - -> (value: Output, pullback: (Output.CotangentVector) - -> (CotangentVector, Input.CotangentVector)) { + -> (value: Output, pullback: (Output.TangentVector) + -> (TangentVector, Input.TangentVector)) { return withLearningPhase(LearningPhase.inference) { let (output, pullback) = appliedForBackpropagation(to: input) return (output, { v in pullback(v) }) } } - typealias Backpropagator = (_ direction: Output.CotangentVector) - -> (layerGradient: CotangentVector, inputGradient: Input.CotangentVector) + typealias Backpropagator = (_ direction: Output.TangentVector) + -> (layerGradient: TangentVector, inputGradient: Input.TangentVector) /// Returns the inference output and the backpropagation function obtained from applying the /// layer to the given input. @@ -728,7 +728,7 @@ public struct BatchNorm: Layer { @usableFromInline func _vjpApplied(to input: Tensor) -> (Tensor, (Tensor) -> - (BatchNorm.CotangentVector, Tensor)) { + (BatchNorm.TangentVector, Tensor)) { switch Context.local.learningPhase { case .training: return valueWithPullback(at: input) { @@ -1086,7 +1086,7 @@ public struct Dropout: Layer { @usableFromInline func _vjpApplied(to input: Tensor) -> (Tensor, (Tensor) -> - (Dropout.CotangentVector, Tensor)) { + (Dropout.TangentVector, Tensor)) { switch Context.local.learningPhase { case .training: return valueWithPullback(at: input) { @@ -1435,8 +1435,8 @@ public struct RNN: Layer { internal func _vjpCall( _ inputs: [Cell.TimeStepInput], initialState: Cell.State ) -> ([Cell.TimeStepOutput], - (Array.CotangentVector) - -> (CotangentVector, Array.CotangentVector)) { + (Array.TangentVector) + -> (TangentVector, Array.TangentVector)) { let timeStepCount = inputs.count var currentHiddenState = cell.zeroState var timeStepOutputs: [Cell.TimeStepOutput] = [] @@ -1454,9 +1454,9 @@ public struct RNN: Layer { return (timeStepOutputs, { 𝛁outputs in precondition(𝛁outputs.base.count == timeStepCount, "The number of output gradients must equal the number of time steps") - var 𝛁cell = Cell.CotangentVector.zero - var 𝛁state = Cell.State.CotangentVector.zero - var reversed𝛁inputs: [Cell.TimeStepInput.CotangentVector] = [] + var 𝛁cell = Cell.TangentVector.zero + var 𝛁state = Cell.State.TangentVector.zero + var reversed𝛁inputs: [Cell.TimeStepInput.TangentVector] = [] reversed𝛁inputs.reserveCapacity(timeStepCount) for (𝛁output, backpropagator) in zip(𝛁outputs.base, backpropagators).reversed() { let (new𝛁cell, 𝛁input) = backpropagator(.init(output: 𝛁output, state: 𝛁state)) diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift index 6e0eb2c28..6650b1c3a 100644 --- a/Sources/DeepLearning/Optimizer.swift +++ b/Sources/DeepLearning/Optimizer.swift @@ -30,7 +30,7 @@ public protocol Optimizer { /// Updates the specified differentiable variables along the specified /// direction. mutating func update(_ variables: inout Model.AllDifferentiableVariables, - along direction: Model.CotangentVector) + along direction: Model.TangentVector) } fileprivate extension Tensor where Scalar: Numeric { @@ -46,7 +46,7 @@ fileprivate extension Tensor where Scalar: Numeric { /// Reference: ["Adam - A Method for Stochastic Optimization"]( /// https://arxiv.org/abs/1412.6980v8) public class Adam: Optimizer - where Model.AllDifferentiableVariables == Model.CotangentVector { + where Model.AllDifferentiableVariables == Model.TangentVector { /// The learning rate. public var learningRate: Float /// A coefficient used to calculate the first and second moments of @@ -142,7 +142,7 @@ public class Adam: Optimizer /// Reference: ["rmsprop: Divide the gradient by a running average of its recent magnitude"]( /// http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) public class RMSProp: Optimizer - where Model.AllDifferentiableVariables == Model.CotangentVector { + where Model.AllDifferentiableVariables == Model.TangentVector { /// The learning rate. public var learningRate: Float // TODO: Document `rho`. Keras doesn't document `rho`. @@ -182,7 +182,7 @@ public class RMSProp: Optimizer public func update(_ model: inout Model.AllDifferentiableVariables, - along direction: Model.CotangentVector) { + along direction: Model.TangentVector) { step += 1 let learningRate = self.learningRate * 1 / (1 + decay * Float(step)) for kp in model.recursivelyAllWritableKeyPaths(to: Tensor.self) { @@ -206,7 +206,7 @@ public class RMSProp: Optimizer /// An optimizer that implements stochastic gradient descent, with support for momentum, learning /// rate decay, and Nesterov momentum. public class SGD: Optimizer - where Model.AllDifferentiableVariables == Model.CotangentVector { + where Model.AllDifferentiableVariables == Model.TangentVector { /// The learning rate. public var learningRate: Float /// The momentum factor. It accelerates stochastic gradient descent in the relevant direction @@ -246,7 +246,7 @@ public class SGD: Optimizer } public func update(_ model: inout Model.AllDifferentiableVariables, - along direction: Model.CotangentVector) { + along direction: Model.TangentVector) { step += 1 let learningRate = self.learningRate * 1 / (1 + decay * Float(step)) for kp in model.recursivelyAllWritableKeyPaths(to: Tensor.self) { @@ -294,7 +294,7 @@ public class RiemannSGD: Optimizer } public func update(_ model: inout Model.AllDifferentiableVariables, - along direction: Model.CotangentVector) { - model = model.moved(along: learningRate * (.zero - model.tangentVector(from: direction))) + along direction: Model.TangentVector) { + model = model.moved(along: learningRate * (.zero - direction)) } }