diff --git a/Sources/DeepLearning/Layer.swift b/Sources/DeepLearning/Layer.swift
index 50d78ad91..6a2f02235 100644
--- a/Sources/DeepLearning/Layer.swift
+++ b/Sources/DeepLearning/Layer.swift
@@ -51,16 +51,16 @@ public extension Layer {
     @differentiating(inferring(from:))
     @usableFromInline
     internal func _vjpInferring(from input: Input)
-        -> (value: Output, pullback: (Output.CotangentVector)
-            -> (CotangentVector, Input.CotangentVector)) {
+        -> (value: Output, pullback: (Output.TangentVector)
+            -> (TangentVector, Input.TangentVector)) {
         return withLearningPhase(LearningPhase.inference) {
             let (output, pullback) = appliedForBackpropagation(to: input)
             return (output, { v in pullback(v) })
         }
     }
 
-    typealias Backpropagator = (_ direction: Output.CotangentVector)
-        -> (layerGradient: CotangentVector, inputGradient: Input.CotangentVector)
+    typealias Backpropagator = (_ direction: Output.TangentVector)
+        -> (layerGradient: TangentVector, inputGradient: Input.TangentVector)
 
     /// Returns the inference output and the backpropagation function obtained from applying the
     /// layer to the given input.
@@ -728,7 +728,7 @@ public struct BatchNorm<Scalar: TensorFlowFloatingPoint>: Layer {
     @usableFromInline
     func _vjpApplied(to input: Tensor<Scalar>) ->
         (Tensor<Scalar>, (Tensor<Scalar>) ->
-            (BatchNorm<Scalar>.CotangentVector, Tensor<Scalar>)) {
+            (BatchNorm<Scalar>.TangentVector, Tensor<Scalar>)) {
         switch Context.local.learningPhase {
         case .training:
             return valueWithPullback(at: input) {
@@ -1086,7 +1086,7 @@ public struct Dropout<Scalar: TensorFlowFloatingPoint>: Layer {
     @usableFromInline
     func _vjpApplied(to input: Tensor<Scalar>) ->
         (Tensor<Scalar>, (Tensor<Scalar>) ->
-            (Dropout<Scalar>.CotangentVector, Tensor<Scalar>)) {
+            (Dropout<Scalar>.TangentVector, Tensor<Scalar>)) {
         switch Context.local.learningPhase {
         case .training:
             return valueWithPullback(at: input) {
@@ -1435,8 +1435,8 @@ public struct RNN<Cell: RNNCell>: Layer {
     internal func _vjpCall(
         _ inputs: [Cell.TimeStepInput], initialState: Cell.State
     ) -> ([Cell.TimeStepOutput],
-          (Array<Cell.TimeStepOutput>.CotangentVector)
-              -> (CotangentVector, Array<Cell.TimeStepInput>.CotangentVector)) {
+          (Array<Cell.TimeStepOutput>.TangentVector)
+              -> (TangentVector, Array<Cell.TimeStepInput>.TangentVector)) {
         let timeStepCount = inputs.count
         var currentHiddenState = cell.zeroState
         var timeStepOutputs: [Cell.TimeStepOutput] = []
@@ -1454,9 +1454,9 @@ public struct RNN<Cell: RNNCell>: Layer {
         return (timeStepOutputs, { 𝛁outputs in
             precondition(𝛁outputs.base.count == timeStepCount,
                          "The number of output gradients must equal the number of time steps")
-            var 𝛁cell = Cell.CotangentVector.zero
-            var 𝛁state = Cell.State.CotangentVector.zero
-            var reversed𝛁inputs: [Cell.TimeStepInput.CotangentVector] = []
+            var 𝛁cell = Cell.TangentVector.zero
+            var 𝛁state = Cell.State.TangentVector.zero
+            var reversed𝛁inputs: [Cell.TimeStepInput.TangentVector] = []
             reversed𝛁inputs.reserveCapacity(timeStepCount)
             for (𝛁output, backpropagator) in zip(𝛁outputs.base, backpropagators).reversed() {
                 let (new𝛁cell, 𝛁input) = backpropagator(.init(output: 𝛁output, state: 𝛁state))
diff --git a/Sources/DeepLearning/Optimizer.swift b/Sources/DeepLearning/Optimizer.swift
index 6e0eb2c28..6650b1c3a 100644
--- a/Sources/DeepLearning/Optimizer.swift
+++ b/Sources/DeepLearning/Optimizer.swift
@@ -30,7 +30,7 @@ public protocol Optimizer {
     /// Updates the specified differentiable variables along the specified
     /// direction.
     mutating func update(_ variables: inout Model.AllDifferentiableVariables,
-                         along direction: Model.CotangentVector)
+                         along direction: Model.TangentVector)
 }
 
 fileprivate extension Tensor where Scalar: Numeric {
@@ -46,7 +46,7 @@ fileprivate extension Tensor where Scalar: Numeric {
 /// Reference: ["Adam - A Method for Stochastic Optimization"](
 /// https://arxiv.org/abs/1412.6980v8)
 public class Adam<Model: Layer>: Optimizer
-    where Model.AllDifferentiableVariables == Model.CotangentVector {
+    where Model.AllDifferentiableVariables == Model.TangentVector {
     /// The learning rate.
     public var learningRate: Float
     /// A coefficient used to calculate the first and second moments of
@@ -142,7 +142,7 @@ public class Adam<Model: Layer>: Optimizer
 /// Reference: ["rmsprop: Divide the gradient by a running average of its recent magnitude"](
 /// http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
 public class RMSProp<Model: Layer>: Optimizer
-    where Model.AllDifferentiableVariables == Model.CotangentVector {
+    where Model.AllDifferentiableVariables == Model.TangentVector {
     /// The learning rate.
     public var learningRate: Float
     // TODO: Document `rho`. Keras doesn't document `rho`.
@@ -182,7 +182,7 @@ public class RMSProp<Model: Layer>: Optimizer
 
 
     public func update(_ model: inout Model.AllDifferentiableVariables,
-                       along direction: Model.CotangentVector) {
+                       along direction: Model.TangentVector) {
         step += 1
         let learningRate = self.learningRate * 1 / (1 + decay * Float(step))
         for kp in model.recursivelyAllWritableKeyPaths(to: Tensor<Float>.self) {
@@ -206,7 +206,7 @@ public class RMSProp<Model: Layer>: Optimizer
 /// An optimizer that implements stochastic gradient descent, with support for momentum, learning
 /// rate decay, and Nesterov momentum.
 public class SGD<Model: Layer>: Optimizer
-    where Model.AllDifferentiableVariables == Model.CotangentVector {
+    where Model.AllDifferentiableVariables == Model.TangentVector {
     /// The learning rate.
     public var learningRate: Float
     /// The momentum factor. It accelerates stochastic gradient descent in the relevant direction
@@ -246,7 +246,7 @@ public class SGD<Model: Layer>: Optimizer
     }
 
     public func update(_ model: inout Model.AllDifferentiableVariables,
-                       along direction: Model.CotangentVector) {
+                       along direction: Model.TangentVector) {
         step += 1
         let learningRate = self.learningRate * 1 / (1 + decay * Float(step))
         for kp in model.recursivelyAllWritableKeyPaths(to: Tensor<Float>.self) {
@@ -294,7 +294,7 @@ public class RiemannSGD<Model: Layer, Scalar: FloatingPoint>: Optimizer
     }
 
     public func update(_ model: inout Model.AllDifferentiableVariables,
-                       along direction: Model.CotangentVector) {
-        model = model.moved(along: learningRate * (.zero - model.tangentVector(from: direction)))
+                       along direction: Model.TangentVector) {
+        model = model.moved(along: learningRate * (.zero - direction))
     }
 }