From be0fc5e09db39a6f589870cd3269d6d31038eb4d Mon Sep 17 00:00:00 2001
From: Brett Koonce <koonce@hello.com>
Date: Wed, 8 May 2019 21:12:17 -0500
Subject: [PATCH 1/5] rebuild resnet using blocks

---
 ResNet/Helpers.swift  |  51 +++++
 ResNet/ResNet.swift   | 348 ++++++++++++++++++++++++++++++++
 ResNet/ResNet50.swift | 452 ------------------------------------------
 ResNet/main.swift     |   5 +-
 4 files changed, 401 insertions(+), 455 deletions(-)
 create mode 100644 ResNet/Helpers.swift
 create mode 100644 ResNet/ResNet.swift
 delete mode 100644 ResNet/ResNet50.swift

diff --git a/ResNet/Helpers.swift b/ResNet/Helpers.swift
new file mode 100644
index 00000000000..2101c31e893
--- /dev/null
+++ b/ResNet/Helpers.swift
@@ -0,0 +1,51 @@
+// Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// TODO: Remove this when it's moved to the standard library.
+extension Array where Element: Differentiable {
+    @differentiable(wrt: (self, initialResult), vjp: reduceDerivative)
+    func differentiableReduce<Result: Differentiable>(
+        _ initialResult: Result,
+        _ nextPartialResult: @differentiable (Result, Element) -> Result
+    ) -> Result {
+        return reduce(initialResult, nextPartialResult)
+    }
+
+    func reduceDerivative<Result: Differentiable>(
+        _ initialResult: Result,
+        _ nextPartialResult: @differentiable (Result, Element) -> Result
+    ) -> (Result, (Result.CotangentVector) -> (Array.CotangentVector, Result.CotangentVector)) {
+        var pullbacks: [(Result.CotangentVector)
+            -> (Result.CotangentVector, Element.CotangentVector)] = []
+        let count = self.count
+        pullbacks.reserveCapacity(count)
+        var result = initialResult
+        for element in self {
+            let (y, pb) = Swift.valueWithPullback(at: result, element, in: nextPartialResult)
+            result = y
+            pullbacks.append(pb)
+        }
+        return (value: result, pullback: { cotangent in
+            var resultCotangent = cotangent
+            var elementCotangents = CotangentVector([])
+            elementCotangents.base.reserveCapacity(count)
+            for pullback in pullbacks.reversed() {
+                let (newResultCotangent, elementCotangent) = pullback(resultCotangent)
+                resultCotangent = newResultCotangent
+                elementCotangents.base.append(elementCotangent)
+            }
+            return (CotangentVector(elementCotangents.base.reversed()), resultCotangent)
+        })
+    }
+}
diff --git a/ResNet/ResNet.swift b/ResNet/ResNet.swift
new file mode 100644
index 00000000000..ab56ab12daf
--- /dev/null
+++ b/ResNet/ResNet.swift
@@ -0,0 +1,348 @@
+// Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import TensorFlow
+
+// Original Paper:
+// "Deep Residual Learning for Image Recognition"
+// Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+// https://arxiv.org/abs/1512.03385
+// using shortcut layer to connect BasicBlock layers (aka Option (B))
+
+struct ConvBN: Layer {
+    typealias Input = Tensor<Float>
+    typealias Output = Tensor<Float>
+
+    var conv: Conv2D<Float>
+    var norm: BatchNorm<Float>
+
+    init(
+        filterShape: (Int, Int, Int, Int),
+        strides: (Int, Int) = (1, 1),
+        padding: Padding = .valid
+    ) {
+        self.conv = Conv2D(filterShape: filterShape, strides: strides, padding: padding)
+        self.norm = BatchNorm(featureCount: filterShape.3)
+    }
+
+    @differentiable
+    func call(_ input: Input) -> Output {
+        return input.sequenced(through: conv, norm)
+    }
+}
+
+struct ResidualBasicBlockShortcut: Layer {
+    typealias Input = Tensor<Float>
+    typealias Output = Tensor<Float>
+
+    var layer1: ConvBN
+    var layer2: ConvBN
+    var shortcut: ConvBN
+
+    init(featureCounts: (Int, Int, Int, Int), kernelSize: Int = 3) {
+        self.layer1 = ConvBN(
+            filterShape: (kernelSize, kernelSize, featureCounts.0, featureCounts.1),
+            strides: (2, 2),
+            padding: .same)
+        self.layer2 = ConvBN(
+            filterShape: (kernelSize, kernelSize, featureCounts.1, featureCounts.2),
+            strides: (1, 1),
+            padding: .same)
+        self.shortcut = ConvBN(
+            filterShape: (1, 1, featureCounts.0, featureCounts.3),
+            strides: (2, 2),
+            padding: .same)
+    }
+
+    @differentiable
+    func call(_ input: Input) -> Output {
+        return layer2(relu(layer1(input))) + shortcut(input)
+    }
+}
+
+struct ResidualBasicBlock: Layer {
+    typealias Input = Tensor<Float>
+    typealias Output = Tensor<Float>
+
+    var layer1: ConvBN
+    var layer2: ConvBN
+
+    init(
+        featureCounts: (Int, Int, Int, Int),
+        kernelSize: Int = 3,
+        strides: (Int, Int) = (1, 1)
+    ) {
+        self.layer1 = ConvBN(
+            filterShape: (kernelSize, kernelSize, featureCounts.0, featureCounts.1),
+            strides: strides,
+            padding: .same)
+        self.layer2 = ConvBN(
+            filterShape: (kernelSize, kernelSize, featureCounts.1, featureCounts.3),
+            strides: strides,
+            padding: .same)
+    }
+
+    @differentiable
+    func call(_ input: Input) -> Output {
+        return layer2(relu(layer1(input)))
+    }
+}
+
+struct ResidualBasicBlockStack: Layer {
+    typealias Input = Tensor<Float>
+    typealias Output = Tensor<Float>
+
+    var blocks: [ResidualBasicBlock] = []
+    init(featureCounts: (Int, Int, Int, Int), kernelSize: Int = 3, blockCount: Int) {
+        for _ in 1..<blockCount {
+            blocks += [ResidualBasicBlock(featureCounts: featureCounts, kernelSize: kernelSize)]
+        }
+    }
+
+    @differentiable
+    func call(_ input: Input) -> Output {
+        let blocksReduced = blocks.differentiableReduce(input) { last, layer in
+            layer(last)
+        }
+        return blocksReduced
+    }
+}
+
+struct ResidualConvBlock: Layer {
+    typealias Input = Tensor<Float>
+    typealias Output = Tensor<Float>
+
+    var layer1: ConvBN
+    var layer2: ConvBN
+    var layer3: ConvBN
+    var shortcut: ConvBN
+
+    init(
+        featureCounts: (Int, Int, Int, Int),
+        kernelSize: Int = 3,
+        strides: (Int, Int) = (2, 2)
+    ) {
+        self.layer1 = ConvBN(
+            filterShape: (1, 1, featureCounts.0, featureCounts.1),
+            strides: strides)
+        self.layer2 = ConvBN(
+            filterShape: (kernelSize, kernelSize, featureCounts.1, featureCounts.2),
+            padding: .same)
+        self.layer3 = ConvBN(filterShape: (1, 1, featureCounts.2, featureCounts.3))
+        self.shortcut = ConvBN(
+            filterShape: (1, 1, featureCounts.0, featureCounts.3),
+            strides: strides,
+            padding: .same)
+    }
+
+    @differentiable
+    func call(_ input: Input) -> Output {
+        let tmp = relu(layer2(relu(layer1(input))))
+        return relu(layer3(tmp) + shortcut(input))
+    }
+}
+
+struct ResidualIdentityBlock: Layer {
+    typealias Input = Tensor<Float>
+    typealias Output = Tensor<Float>
+
+    var layer1: ConvBN
+    var layer2: ConvBN
+    var layer3: ConvBN
+
+    init(featureCounts: (Int, Int, Int, Int), kernelSize: Int = 3) {
+        self.layer1 = ConvBN(filterShape: (1, 1, featureCounts.0, featureCounts.1))
+        self.layer2 = ConvBN(
+            filterShape: (kernelSize, kernelSize, featureCounts.1, featureCounts.2),
+            padding: .same)
+        self.layer3 = ConvBN(filterShape: (1, 1, featureCounts.2, featureCounts.3))
+    }
+
+    @differentiable
+    func call(_ input: Input) -> Output {
+        let tmp = relu(layer2(relu(layer1(input))))
+        return relu(layer3(tmp) + input)
+    }
+}
+
+struct ResidualIdentityBlockStack: Layer {
+    typealias Input = Tensor<Float>
+    typealias Output = Tensor<Float>
+
+    var blocks: [ResidualIdentityBlock] = []
+    init(featureCounts: (Int, Int, Int, Int), kernelSize: Int = 3, blockCount: Int) {
+        for _ in 1..<blockCount {
+            blocks += [ResidualIdentityBlock(featureCounts: featureCounts, kernelSize: kernelSize)]
+        }
+    }
+
+    @differentiable
+    func call(_ input: Input) -> Output {
+        let blocksReduced = blocks.differentiableReduce(input) { last, layer in
+            layer(last)
+        }
+        return blocksReduced
+    }
+}
+
+struct ResNetBasic: Layer {
+    typealias Input = Tensor<Float>
+    typealias Output = Tensor<Float>
+
+    var l1: ConvBN
+    var maxPool: MaxPool2D<Float>
+
+    var l2a = ResidualBasicBlock(featureCounts: (64, 64, 64, 64))
+    var l2b: ResidualBasicBlockStack
+
+    var l3a = ResidualBasicBlockShortcut(featureCounts: (64, 128, 128, 128))
+    var l3b: ResidualBasicBlockStack
+
+    var l4a = ResidualBasicBlockShortcut(featureCounts: (128, 256, 256, 256))
+    var l4b: ResidualBasicBlockStack
+
+    var l5a = ResidualBasicBlockShortcut(featureCounts: (256, 512, 512, 512))
+    var l5b: ResidualBasicBlockStack
+
+    var avgPool: AvgPool2D<Float>
+    var flatten = Flatten<Float>()
+    var classifier: Dense<Float>
+
+    init(imageSize: Int, classCount: Int, layerBlockCounts: (Int, Int, Int, Int)) {
+        // default to the ImageNet case where imageSize == 224
+        // Swift requires that all properties get initialized outside control flow
+        l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
+        maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
+        avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
+        if imageSize == 32 {
+            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
+            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
+            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
+        }
+
+        l2b = ResidualBasicBlockStack(featureCounts: (64, 64, 64, 64),
+            blockCount: layerBlockCounts.0)
+        l3b = ResidualBasicBlockStack(featureCounts: (128, 128, 128, 128),
+            blockCount: layerBlockCounts.1)
+        l4b = ResidualBasicBlockStack(featureCounts: (256, 256, 256, 256),
+            blockCount: layerBlockCounts.2)
+        l5b = ResidualBasicBlockStack(featureCounts: (512, 512, 512, 512),
+            blockCount: layerBlockCounts.3)
+
+        classifier = Dense(inputSize: 512, outputSize: classCount)
+    }
+
+    @differentiable
+    func call(_ input: Input) -> Output {
+        let inputLayer = maxPool(relu(l1(input)))
+        let level2 = inputLayer.sequenced(through: l2a, l2b)
+        let level3 = level2.sequenced(through: l3a, l3b)
+        let level4 = level3.sequenced(through: l4a, l4b)
+        let level5 = level4.sequenced(through: l5a, l5b)
+        return level5.sequenced(through: avgPool, flatten, classifier)
+    }
+}
+
+extension ResNetBasic {
+    enum Kind {
+        case resNet18
+        case resNet34
+    }
+
+    init(kind: Kind, imageSize: Int, classCount: Int) {
+        switch kind {
+        case .resNet18:
+            self.init(imageSize: imageSize, classCount: classCount, layerBlockCounts: (2, 2, 2, 2))
+        case .resNet34:
+            self.init(imageSize: imageSize, classCount: classCount, layerBlockCounts: (3, 4, 6, 3))
+        }
+    }
+}
+
+struct ResNet: Layer {
+    typealias Input = Tensor<Float>
+    typealias Output = Tensor<Float>
+
+    var l1: ConvBN
+    var maxPool: MaxPool2D<Float>
+
+    var l2a = ResidualConvBlock(featureCounts: (64, 64, 64, 256), strides: (1, 1))
+    var l2b: ResidualIdentityBlockStack
+
+    var l3a = ResidualConvBlock(featureCounts: (256, 128, 128, 512))
+    var l3b: ResidualIdentityBlockStack
+
+    var l4a = ResidualConvBlock(featureCounts: (512, 256, 256, 1024))
+    var l4b: ResidualIdentityBlockStack
+
+    var l5a = ResidualConvBlock(featureCounts: (1024, 512, 512, 2048))
+    var l5b: ResidualIdentityBlockStack
+
+    var avgPool: AvgPool2D<Float>
+    var flatten = Flatten<Float>()
+    var classifier: Dense<Float>
+
+    init(imageSize: Int, classCount: Int, layerBlockCounts: (Int, Int, Int, Int)) {
+        // default to the ImageNet case where imageSize == 224
+        // Swift requires that all properties get initialized outside control flow
+        l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
+        maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
+        avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
+        if imageSize == 32 {
+            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
+            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
+            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
+        }
+
+        l2b = ResidualIdentityBlockStack(featureCounts: (256, 64, 64, 256),
+            blockCount: layerBlockCounts.0)
+        l3b = ResidualIdentityBlockStack(featureCounts: (512, 128, 128, 512),
+            blockCount: layerBlockCounts.1)
+        l4b = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024),
+            blockCount: layerBlockCounts.2)
+        l5b = ResidualIdentityBlockStack(featureCounts: (2048, 512, 512, 2048),
+            blockCount: layerBlockCounts.3)
+
+        classifier = Dense(inputSize: 2048, outputSize: classCount)
+    }
+
+    @differentiable
+    func call(_ input: Input) -> Output {
+        let inputLayer = maxPool(relu(l1(input)))
+        let level2 = inputLayer.sequenced(through: l2a, l2b)
+        let level3 = level2.sequenced(through: l3a, l3b)
+        let level4 = level3.sequenced(through: l4a, l4b)
+        let level5 = level4.sequenced(through: l5a, l5b)
+        return level5.sequenced(through: avgPool, flatten, classifier)
+    }
+}
+
+extension ResNet {
+    enum Kind {
+        case resNet50
+        case resNet101
+        case resNet152
+    }
+
+    init(kind: Kind, imageSize: Int, classCount: Int) {
+        switch kind {
+        case .resNet50:
+            self.init(imageSize: imageSize, classCount: classCount, layerBlockCounts: (3, 4, 6, 3))
+        case .resNet101:
+            self.init(imageSize: imageSize, classCount: classCount, layerBlockCounts: (3, 4, 23, 3))
+        case .resNet152:
+            self.init(imageSize: imageSize, classCount: classCount, layerBlockCounts: (3, 8, 36, 3))
+        }
+    }
+}
diff --git a/ResNet/ResNet50.swift b/ResNet/ResNet50.swift
deleted file mode 100644
index 487cee5eded..00000000000
--- a/ResNet/ResNet50.swift
+++ /dev/null
@@ -1,452 +0,0 @@
-// Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-import TensorFlow
-
-// Original Paper:
-// "Deep Residual Learning for Image Recognition"
-// Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
-// https://arxiv.org/abs/1512.03385
-// using shortcut layer to connect BasicBlock layers (aka Option (B))
-
-struct ConvBN: Layer {
-    typealias Input = Tensor<Float>
-    typealias Output = Tensor<Float>
-
-    var conv: Conv2D<Float>
-    var norm: BatchNorm<Float>
-
-    init(
-        filterShape: (Int, Int, Int, Int),
-        strides: (Int, Int) = (1, 1),
-        padding: Padding = .valid
-    ) {
-        self.conv = Conv2D(filterShape: filterShape, strides: strides, padding: padding)
-        self.norm = BatchNorm(featureCount: filterShape.3)
-    }
-
-    @differentiable
-    func call(_ input: Input) -> Output {
-        return input.sequenced(through: conv, norm)
-    }
-}
-
-struct ResidualBasicBlock: Layer {
-    typealias Input = Tensor<Float>
-    typealias Output = Tensor<Float>
-
-    var layer1: ConvBN
-    var layer2: ConvBN
-
-    init(
-        featureCounts: (Int, Int, Int, Int),
-        kernelSize: Int = 3,
-        strides: (Int, Int) = (1, 1)
-    ) {
-        self.layer1 = ConvBN(
-            filterShape: (kernelSize, kernelSize, featureCounts.0, featureCounts.1),
-            strides: strides,
-            padding: .same)
-        self.layer2 = ConvBN(
-            filterShape: (kernelSize, kernelSize, featureCounts.1, featureCounts.3),
-            strides: strides,
-            padding: .same)
-    }
-
-    @differentiable
-    func call(_ input: Input) -> Output {
-        return layer2(relu(layer1(input)))
-    }
-}
-
-struct ResidualBasicBlockShortcut: Layer {
-    typealias Input = Tensor<Float>
-    typealias Output = Tensor<Float>
-
-    var layer1: ConvBN
-    var layer2: ConvBN
-    var shortcut: ConvBN
-
-    init(featureCounts: (Int, Int, Int, Int), kernelSize: Int = 3) {
-        self.layer1 = ConvBN(
-            filterShape: (kernelSize, kernelSize, featureCounts.0, featureCounts.1),
-            strides: (2, 2),
-            padding: .same)
-        self.layer2 = ConvBN(
-            filterShape: (kernelSize, kernelSize, featureCounts.1, featureCounts.2),
-            strides: (1, 1),
-            padding: .same)
-        self.shortcut = ConvBN(
-            filterShape: (1, 1, featureCounts.0, featureCounts.3),
-            strides: (2, 2),
-            padding: .same)
-    }
-
-    @differentiable
-    func call(_ input: Input) -> Output {
-        return layer2(relu(layer1(input))) + shortcut(input)
-    }
-}
-
-struct ResidualConvBlock: Layer {
-    typealias Input = Tensor<Float>
-    typealias Output = Tensor<Float>
-
-    var layer1: ConvBN
-    var layer2: ConvBN
-    var layer3: ConvBN
-    var shortcut: ConvBN
-
-    init(
-        featureCounts: (Int, Int, Int, Int),
-        kernelSize: Int = 3,
-        strides: (Int, Int) = (2, 2)
-    ) {
-        self.layer1 = ConvBN(
-            filterShape: (1, 1, featureCounts.0, featureCounts.1),
-            strides: strides)
-        self.layer2 = ConvBN(
-            filterShape: (kernelSize, kernelSize, featureCounts.1, featureCounts.2),
-            padding: .same)
-        self.layer3 = ConvBN(filterShape: (1, 1, featureCounts.2, featureCounts.3))
-        self.shortcut = ConvBN(
-            filterShape: (1, 1, featureCounts.0, featureCounts.3),
-            strides: strides,
-            padding: .same)
-    }
-
-    @differentiable
-    func call(_ input: Input) -> Output {
-        let tmp = relu(layer2(relu(layer1(input))))
-        return relu(layer3(tmp) + shortcut(input))
-    }
-}
-
-struct ResidualIdentityBlock: Layer {
-    typealias Input = Tensor<Float>
-    typealias Output = Tensor<Float>
-
-    var layer1: ConvBN
-    var layer2: ConvBN
-    var layer3: ConvBN
-
-    init(featureCounts: (Int, Int, Int, Int), kernelSize: Int = 3) {
-        self.layer1 = ConvBN(filterShape: (1, 1, featureCounts.0, featureCounts.1))
-        self.layer2 = ConvBN(
-            filterShape: (kernelSize, kernelSize, featureCounts.1, featureCounts.2),
-            padding: .same)
-        self.layer3 = ConvBN(filterShape: (1, 1, featureCounts.2, featureCounts.3))
-    }
-
-    @differentiable
-    func call(_ input: Input) -> Output {
-        let tmp = relu(layer2(relu(layer1(input))))
-        return relu(layer3(tmp) + input)
-    }
-}
-
-struct ResidualIdentityBlockStack: Layer {
-    typealias Input = Tensor<Float>
-    typealias Output = Tensor<Float>
-
-    var block1: ResidualIdentityBlock
-    var block2: ResidualIdentityBlock
-    var block3: ResidualIdentityBlock
-    var block4: ResidualIdentityBlock
-    var block5: ResidualIdentityBlock
-
-    init(featureCounts: (Int, Int, Int, Int), kernelSize: Int = 3) {
-        self.block1 = ResidualIdentityBlock(featureCounts: featureCounts, kernelSize: kernelSize)
-        self.block2 = ResidualIdentityBlock(featureCounts: featureCounts, kernelSize: kernelSize)
-        self.block3 = ResidualIdentityBlock(featureCounts: featureCounts, kernelSize: kernelSize)
-        self.block4 = ResidualIdentityBlock(featureCounts: featureCounts, kernelSize: kernelSize)
-        self.block5 = ResidualIdentityBlock(featureCounts: featureCounts, kernelSize: kernelSize)
-    }
-
-    @differentiable
-    func call(_ input: Input) -> Output {
-        return input.sequenced(through: block1, block2, block3, block4, block5)
-    }
-}
-
-struct ResNet18: Layer {
-    typealias Input = Tensor<Float>
-    typealias Output = Tensor<Float>
-
-    var l1: ConvBN
-    var maxPool: MaxPool2D<Float>
-
-    var l2a = ResidualBasicBlock(featureCounts: (64, 64, 64, 64))
-    var l2b = ResidualBasicBlock(featureCounts: (64, 64, 64, 64))
-
-    var l3a = ResidualBasicBlockShortcut(featureCounts: (64, 128, 128, 128))
-    var l3b = ResidualBasicBlock(featureCounts: (128, 128, 128, 128))
-
-    var l4a = ResidualBasicBlockShortcut(featureCounts: (128, 256, 256, 256))
-    var l4b = ResidualBasicBlock(featureCounts: (256, 256, 256, 256))
-
-    var l5a = ResidualBasicBlockShortcut(featureCounts: (256, 512, 512, 512))
-    var l5b = ResidualBasicBlock(featureCounts: (512, 512, 512, 512))
- 
-    var avgPool: AvgPool2D<Float>
-    var flatten = Flatten<Float>()
-    var classifier: Dense<Float>
-
-    init(imageSize: Int, classCount: Int) {
-        // default to the ImageNet case where imageSize == 224
-        // Swift requires that all properties get initialized outside control flow
-        l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
-        maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
-        avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
-        if imageSize == 32 {
-            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
-            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
-            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
-        }
-        classifier = Dense(inputSize: 512, outputSize: classCount)
-    }
-
-    @differentiable
-    func call(_ input: Input) -> Output {
-        let inputLayer = maxPool(relu(l1(input)))
-        let level2 = inputLayer.sequenced(through: l2a, l2b)
-        let level3 = level2.sequenced(through: l3a, l3b)
-        let level4 = level3.sequenced(through: l4a, l4b)
-        let level5 = level4.sequenced(through: l5a, l5b)
-        return level5.sequenced(through: avgPool, flatten, classifier)
-    }
-}
-
-struct ResNet34: Layer {
-    typealias Input = Tensor<Float>
-    typealias Output = Tensor<Float>
-
-    var l1: ConvBN
-    var maxPool: MaxPool2D<Float>
-
-    var l2a = ResidualBasicBlock(featureCounts: (64, 64, 64, 64))
-    var l2b = ResidualBasicBlock(featureCounts: (64, 64, 64, 64))
-    var l2c = ResidualBasicBlock(featureCounts: (64, 64, 64, 64))
-
-    var l3a = ResidualBasicBlockShortcut(featureCounts: (64, 128, 128, 128))
-    var l3b = ResidualBasicBlock(featureCounts: (128, 128, 128, 128))
-    var l3c = ResidualBasicBlock(featureCounts: (128, 128, 128, 128))
-    var l3d = ResidualBasicBlock(featureCounts: (128, 128, 128, 128))
-
-    var l4a = ResidualBasicBlockShortcut(featureCounts: (128, 256, 256, 256))
-    var l4b = ResidualBasicBlock(featureCounts: (256, 256, 256, 256))
-    var l4c = ResidualBasicBlock(featureCounts: (256, 256, 256, 256))
-    var l4d = ResidualBasicBlock(featureCounts: (256, 256, 256, 256))
-    var l4e = ResidualBasicBlock(featureCounts: (256, 256, 256, 256))
-    var l4f = ResidualBasicBlock(featureCounts: (256, 256, 256, 256))
-
-    var l5a = ResidualBasicBlockShortcut(featureCounts: (256, 512, 512, 512))
-    var l5b = ResidualBasicBlock(featureCounts: (512, 512, 512, 512))
-    var l5c = ResidualBasicBlock(featureCounts: (512, 512, 512, 512))
-
-    var avgPool: AvgPool2D<Float>
-    var flatten = Flatten<Float>()
-    var classifier: Dense<Float>
-
-    init(imageSize: Int, classCount: Int) {
-        // default to the ImageNet case where imageSize == 224
-        // Swift requires that all properties get initialized outside control flow
-        l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
-        maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
-        avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
-        if imageSize == 32 {
-            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
-            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
-            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
-        }
-        classifier = Dense(inputSize: 512, outputSize: classCount)
-    }
-
-    @differentiable
-    func call(_ input: Input) -> Output {
-        let inputLayer = maxPool(relu(l1(input)))
-        let level2 = inputLayer.sequenced(through: l2a, l2b, l2c)
-        let level3 = level2.sequenced(through: l3a, l3b, l3c, l3d)
-        let level4 = level3.sequenced(through: l4a, l4b, l4c, l4d, l4e, l4f)
-        let level5 = level4.sequenced(through: l5a, l5b, l5c)
-        return level5.sequenced(through: avgPool, flatten, classifier)
-    }
-}
-
-struct ResNet50: Layer {
-    typealias Input = Tensor<Float>
-    typealias Output = Tensor<Float>
-
-    var l1: ConvBN
-    var maxPool: MaxPool2D<Float>
-
-    var l2a = ResidualConvBlock(featureCounts: (64, 64, 64, 256), strides: (1, 1))
-    var l2b = ResidualIdentityBlock(featureCounts: (256, 64, 64, 256))
-    var l2c = ResidualIdentityBlock(featureCounts: (256, 64, 64, 256))
-
-    var l3a = ResidualConvBlock(featureCounts: (256, 128, 128, 512))
-    var l3b = ResidualIdentityBlock(featureCounts: (512, 128, 128, 512))
-    var l3c = ResidualIdentityBlock(featureCounts: (512, 128, 128, 512))
-    var l3d = ResidualIdentityBlock(featureCounts: (512, 128, 128, 512))
-
-    var l4a = ResidualConvBlock(featureCounts: (512, 256, 256, 1024))
-    var l4b = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024))
-
-    var l5a = ResidualConvBlock(featureCounts: (1024, 512, 512, 2048))
-    var l5b = ResidualIdentityBlock(featureCounts: (2048, 512, 512, 2048))
-    var l5c = ResidualIdentityBlock(featureCounts: (2048, 512, 512, 2048))
-
-    var avgPool: AvgPool2D<Float>
-    var flatten = Flatten<Float>()
-    var classifier: Dense<Float>
-
-    init(imageSize: Int, classCount: Int) {
-        // default to the ImageNet case where imageSize == 224
-        // Swift requires that all properties get initialized outside control flow
-        l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
-        maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
-        avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
-        if imageSize == 32 {
-            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
-            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
-            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
-        }
-        classifier = Dense(inputSize: 2048, outputSize: classCount)
-    }
-
-    @differentiable
-    func call(_ input: Input) -> Output {
-        let inputLayer = maxPool(relu(l1(input)))
-        let level2 = inputLayer.sequenced(through: l2a, l2b, l2c)
-        let level3 = level2.sequenced(through: l3a, l3b, l3c, l3d)
-        let level4 = level3.sequenced(through: l4a, l4b)
-        let level5 = level4.sequenced(through: l5a, l5b, l5c)
-        return level5.sequenced(through: avgPool, flatten, classifier)
-    }
-}
-
-struct ResNet101: Layer {
-    typealias Input = Tensor<Float>
-    typealias Output = Tensor<Float>
-
-    var l1: ConvBN
-    var maxPool: MaxPool2D<Float>
-
-    var l2a = ResidualConvBlock(featureCounts: (64, 64, 64, 256), strides: (1, 1))
-    var l2b = ResidualIdentityBlock(featureCounts: (256, 64, 64, 256))
-    var l2c = ResidualIdentityBlock(featureCounts: (256, 64, 64, 256))
-
-    var l3a = ResidualConvBlock(featureCounts: (256, 128, 128, 512))
-    var l3b = ResidualIdentityBlock(featureCounts: (512, 128, 128, 512))
-    var l3c = ResidualIdentityBlock(featureCounts: (512, 128, 128, 512))
-    var l3d = ResidualIdentityBlock(featureCounts: (512, 128, 128, 512))
-
-    var l4a = ResidualConvBlock(featureCounts: (512, 256, 256, 1024))
-    var l4b = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024))
-    var l4c = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024))
-    var l4d = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024))
-    var l4e = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024))
-    var l4f = ResidualIdentityBlock(featureCounts: (1024, 256, 256, 1024))
-    var l4g = ResidualIdentityBlock(featureCounts: (1024, 256, 256, 1024))
-
-    var l5a = ResidualConvBlock(featureCounts: (1024, 512, 512, 2048))
-    var l5b = ResidualIdentityBlock(featureCounts: (2048, 512, 512, 2048))
-    var l5c = ResidualIdentityBlock(featureCounts: (2048, 512, 512, 2048))
-
-    var avgPool: AvgPool2D<Float>
-    var flatten = Flatten<Float>()
-    var classifier: Dense<Float>
-
-    init(imageSize: Int, classCount: Int) {
-        // default to the ImageNet case where imageSize == 224
-        // Swift requires that all properties get initialized outside control flow
-        l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
-        maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
-        avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
-        if imageSize == 32 {
-            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
-            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
-            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
-        }
-        classifier = Dense(inputSize: 2048, outputSize: classCount)
-    }
-
-    @differentiable
-    func call(_ input: Input) -> Output {
-        let inputLayer = maxPool(relu(l1(input)))
-        let level2 = inputLayer.sequenced(through: l2a, l2b, l2c)
-        let level3 = level2.sequenced(through: l3a, l3b, l3c, l3d)
-        let level4 = level3.sequenced(through: l4a, l4b, l4c, l4d, l4e, l4f)
-        let level5 = level4.sequenced(through: l4g, l5a, l5b, l5c) // l4g is here
-        return level5.sequenced(through: avgPool, flatten, classifier)
-    }
-}
-
-struct ResNet152: Layer {
-    typealias Input = Tensor<Float>
-    typealias Output = Tensor<Float>
-
-    var l1: ConvBN
-    var maxPool: MaxPool2D<Float>
-
-    var l2a = ResidualConvBlock(featureCounts: (64, 64, 64, 256), strides: (1, 1))
-    var l2b = ResidualIdentityBlock(featureCounts: (256, 64, 64, 256))
-    var l2c = ResidualIdentityBlock(featureCounts: (256, 64, 64, 256))
-
-    var l3a = ResidualConvBlock(featureCounts: (256, 128, 128, 512))
-    var l3b = ResidualIdentityBlockStack(featureCounts: (512, 128, 128, 512))
-    var l3c = ResidualIdentityBlock(featureCounts: (512, 128, 128, 512))
-    var l3d = ResidualIdentityBlock(featureCounts: (512, 128, 128, 512))
-
-    var l4a = ResidualConvBlock(featureCounts: (512, 256, 256, 1024))
-    var l4b = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024))
-    var l4c = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024))
-    var l4d = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024))
-    var l4e = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024))
-    var l4f = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024))
-    var l4g = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024))
-    var l4h = ResidualIdentityBlockStack(featureCounts: (1024, 256, 256, 1024))
-
-    var l5a = ResidualConvBlock(featureCounts: (1024, 512, 512, 2048))
-    var l5b = ResidualIdentityBlock(featureCounts: (2048, 512, 512, 2048))
-    var l5c = ResidualIdentityBlock(featureCounts: (2048, 512, 512, 2048))
-
-    var avgPool: AvgPool2D<Float>
-    var flatten = Flatten<Float>()
-    var classifier: Dense<Float>
-
-    init(imageSize: Int, classCount: Int) {
-        // default to the ImageNet case where imageSize == 224
-        // Swift requires that all properties get initialized outside control flow
-        l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
-        maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
-        avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
-        if imageSize == 32 {
-            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
-            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
-            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
-        }
-        classifier = Dense(inputSize: 2048, outputSize: classCount)
-    }
-
-    @differentiable
-    func call(_ input: Input) -> Output {
-        let inputLayer = maxPool(relu(l1(input)))
-        let level2 = inputLayer.sequenced(through: l2a, l2b, l2c)
-        let level3 = level2.sequenced(through: l3a, l3b, l3c, l3d)
-        let level4 = level3.sequenced(through: l4a, l4b, l4c, l4d, l4e, l4f)
-        let level5 = level4.sequenced(through: l4g, l4h, l5a, l5b, l5c)  // l4g, l4h are here
-        return level5.sequenced(through: avgPool, flatten, classifier)
-    }
-}
diff --git a/ResNet/main.swift b/ResNet/main.swift
index ee00f3ba9e4..35204603685 100644
--- a/ResNet/main.swift
+++ b/ResNet/main.swift
@@ -21,9 +21,8 @@ let batchSize = 100
 let cifarDataset = loadCIFAR10()
 let testBatches = cifarDataset.test.batched(batchSize)
 
-// ResNet18, ResNet34, ResNet50, ResNet101, ResNet152
-// PreActivatedResNet18, PreActivatedResNet34
-var model = ResNet50(imageSize: 32, classCount: 10) // Use the network sized for CIFAR-10
+// Use the network sized for CIFAR-10
+var model = ResNet(kind: .resNet50, imageSize: 32, classCount: 10)
 
 // the classic ImageNet optimizer setting diverges on CIFAR-10
 // let optimizer = SGD(for: model, learningRate: 0.1, momentum: 0.9)

From a7c3047332330536c118c9b9e5ad0ff1aacd0c6e Mon Sep 17 00:00:00 2001
From: Brett Koonce <koonce@hello.com>
Date: Mon, 13 May 2019 17:35:37 -0500
Subject: [PATCH 2/5] use InputType to determine filter sizes/class counts

---
 ResNet/ResNet.swift | 67 ++++++++++++++++++++++++---------------------
 ResNet/main.swift   |  2 +-
 2 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/ResNet/ResNet.swift b/ResNet/ResNet.swift
index ab56ab12daf..7adc674b487 100644
--- a/ResNet/ResNet.swift
+++ b/ResNet/ResNet.swift
@@ -20,6 +20,11 @@ import TensorFlow
 // https://arxiv.org/abs/1512.03385
 // using shortcut layer to connect BasicBlock layers (aka Option (B))
 
+enum InputType {
+    case cifar
+    case imagenet
+}
+
 struct ConvBN: Layer {
     typealias Input = Tensor<Float>
     typealias Output = Tensor<Float>
@@ -219,16 +224,18 @@ struct ResNetBasic: Layer {
     var flatten = Flatten<Float>()
     var classifier: Dense<Float>
 
-    init(imageSize: Int, classCount: Int, layerBlockCounts: (Int, Int, Int, Int)) {
-        // default to the ImageNet case where imageSize == 224
-        // Swift requires that all properties get initialized outside control flow
-        l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
-        maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
-        avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
-        if imageSize == 32 {
-            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
-            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
-            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
+    init(input: InputType, layerBlockCounts: (Int, Int, Int, Int)) {
+        switch input {
+            case .imagenet:
+                l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
+                maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
+                avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
+                classifier = Dense(inputSize: 512, outputSize: 1000)
+            case .cifar:
+                l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
+                maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
+                avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
+                classifier = Dense(inputSize: 512, outputSize: 10)
         }
 
         l2b = ResidualBasicBlockStack(featureCounts: (64, 64, 64, 64),
@@ -239,8 +246,6 @@ struct ResNetBasic: Layer {
             blockCount: layerBlockCounts.2)
         l5b = ResidualBasicBlockStack(featureCounts: (512, 512, 512, 512),
             blockCount: layerBlockCounts.3)
-
-        classifier = Dense(inputSize: 512, outputSize: classCount)
     }
 
     @differentiable
@@ -260,12 +265,12 @@ extension ResNetBasic {
         case resNet34
     }
 
-    init(kind: Kind, imageSize: Int, classCount: Int) {
+    init(kind: Kind, type: InputType) {
         switch kind {
         case .resNet18:
-            self.init(imageSize: imageSize, classCount: classCount, layerBlockCounts: (2, 2, 2, 2))
+            self.init(input: type, layerBlockCounts: (2, 2, 2, 2))
         case .resNet34:
-            self.init(imageSize: imageSize, classCount: classCount, layerBlockCounts: (3, 4, 6, 3))
+            self.init(input: type, layerBlockCounts: (3, 4, 6, 3))
         }
     }
 }
@@ -293,16 +298,18 @@ struct ResNet: Layer {
     var flatten = Flatten<Float>()
     var classifier: Dense<Float>
 
-    init(imageSize: Int, classCount: Int, layerBlockCounts: (Int, Int, Int, Int)) {
-        // default to the ImageNet case where imageSize == 224
-        // Swift requires that all properties get initialized outside control flow
-        l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
-        maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
-        avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
-        if imageSize == 32 {
-            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
-            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
-            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
+    init(input: InputType, layerBlockCounts: (Int, Int, Int, Int)) {
+        switch input {
+            case .imagenet:
+                l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
+                maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
+                avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
+                classifier = Dense(inputSize: 2048, outputSize: 1000)
+            case .cifar:
+                l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
+                maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
+                avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
+                classifier = Dense(inputSize: 2048, outputSize: 10)
         }
 
         l2b = ResidualIdentityBlockStack(featureCounts: (256, 64, 64, 256),
@@ -313,8 +320,6 @@ struct ResNet: Layer {
             blockCount: layerBlockCounts.2)
         l5b = ResidualIdentityBlockStack(featureCounts: (2048, 512, 512, 2048),
             blockCount: layerBlockCounts.3)
-
-        classifier = Dense(inputSize: 2048, outputSize: classCount)
     }
 
     @differentiable
@@ -335,14 +340,14 @@ extension ResNet {
         case resNet152
     }
 
-    init(kind: Kind, imageSize: Int, classCount: Int) {
+    init(kind: Kind, type: InputType) {
         switch kind {
         case .resNet50:
-            self.init(imageSize: imageSize, classCount: classCount, layerBlockCounts: (3, 4, 6, 3))
+            self.init(input: type, layerBlockCounts: (3, 4, 6, 3))
         case .resNet101:
-            self.init(imageSize: imageSize, classCount: classCount, layerBlockCounts: (3, 4, 23, 3))
+            self.init(input: type, layerBlockCounts: (3, 4, 23, 3))
         case .resNet152:
-            self.init(imageSize: imageSize, classCount: classCount, layerBlockCounts: (3, 8, 36, 3))
+            self.init(input: type, layerBlockCounts: (3, 8, 36, 3))
         }
     }
 }
diff --git a/ResNet/main.swift b/ResNet/main.swift
index 35204603685..8615543c951 100644
--- a/ResNet/main.swift
+++ b/ResNet/main.swift
@@ -22,7 +22,7 @@ let cifarDataset = loadCIFAR10()
 let testBatches = cifarDataset.test.batched(batchSize)
 
 // Use the network sized for CIFAR-10
-var model = ResNet(kind: .resNet50, imageSize: 32, classCount: 10)
+var model = ResNet(kind: .resNet50, type: .cifar)
 
 // the classic ImageNet optimizer setting diverges on CIFAR-10
 // let optimizer = SGD(for: model, learningRate: 0.1, momentum: 0.9)

From 6ed2af6d62fa603099d11b63c80e372235c0649a Mon Sep 17 00:00:00 2001
From: Brett Koonce <koonce@hello.com>
Date: Mon, 13 May 2019 18:58:07 -0500
Subject: [PATCH 3/5] naming/formatting tweaks

---
 ResNet/ResNet.swift | 50 ++++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/ResNet/ResNet.swift b/ResNet/ResNet.swift
index 7adc674b487..1ef92e269b1 100644
--- a/ResNet/ResNet.swift
+++ b/ResNet/ResNet.swift
@@ -20,7 +20,7 @@ import TensorFlow
 // https://arxiv.org/abs/1512.03385
 // using shortcut layer to connect BasicBlock layers (aka Option (B))
 
-enum InputType {
+enum InputKind {
     case cifar
     case imagenet
 }
@@ -224,18 +224,18 @@ struct ResNetBasic: Layer {
     var flatten = Flatten<Float>()
     var classifier: Dense<Float>
 
-    init(input: InputType, layerBlockCounts: (Int, Int, Int, Int)) {
+    init(input: InputKind, layerBlockCounts: (Int, Int, Int, Int)) {
         switch input {
-            case .imagenet:
-                l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
-                maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
-                avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
-                classifier = Dense(inputSize: 512, outputSize: 1000)
-            case .cifar:
-                l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
-                maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
-                avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
-                classifier = Dense(inputSize: 512, outputSize: 10)
+        case .imagenet:
+            l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
+            maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
+            avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
+            classifier = Dense(inputSize: 512, outputSize: 1000)
+        case .cifar:
+            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
+            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
+            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
+            classifier = Dense(inputSize: 512, outputSize: 10)
         }
 
         l2b = ResidualBasicBlockStack(featureCounts: (64, 64, 64, 64),
@@ -265,7 +265,7 @@ extension ResNetBasic {
         case resNet34
     }
 
-    init(kind: Kind, type: InputType) {
+    init(kind: Kind, type: InputKind) {
         switch kind {
         case .resNet18:
             self.init(input: type, layerBlockCounts: (2, 2, 2, 2))
@@ -298,18 +298,18 @@ struct ResNet: Layer {
     var flatten = Flatten<Float>()
     var classifier: Dense<Float>
 
-    init(input: InputType, layerBlockCounts: (Int, Int, Int, Int)) {
+    init(input: InputKind, layerBlockCounts: (Int, Int, Int, Int)) {
         switch input {
-            case .imagenet:
-                l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
-                maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
-                avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
-                classifier = Dense(inputSize: 2048, outputSize: 1000)
-            case .cifar:
-                l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
-                maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
-                avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
-                classifier = Dense(inputSize: 2048, outputSize: 10)
+        case .imagenet:
+            l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
+            maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
+            avgPool = AvgPool2D(poolSize: (7, 7), strides: (7, 7))
+            classifier = Dense(inputSize: 2048, outputSize: 1000)
+        case .cifar:
+            l1 = ConvBN(filterShape: (3, 3, 3, 64), padding: .same)
+            maxPool = MaxPool2D(poolSize: (1, 1), strides: (1, 1)) // no-op
+            avgPool = AvgPool2D(poolSize: (4, 4), strides: (4, 4))
+            classifier = Dense(inputSize: 2048, outputSize: 10)
         }
 
         l2b = ResidualIdentityBlockStack(featureCounts: (256, 64, 64, 256),
@@ -340,7 +340,7 @@ extension ResNet {
         case resNet152
     }
 
-    init(kind: Kind, type: InputType) {
+    init(kind: Kind, type: InputKind) {
         switch kind {
         case .resNet50:
             self.init(input: type, layerBlockCounts: (3, 4, 6, 3))

From cd2f9110ce3487300bda7c100d1803303ac2bf71 Mon Sep 17 00:00:00 2001
From: Brett Koonce <koonce@hello.com>
Date: Mon, 13 May 2019 20:26:15 -0500
Subject: [PATCH 4/5] clarify enum names

---
 ResNet/ResNet.swift | 28 ++++++++++++++--------------
 ResNet/main.swift   |  2 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/ResNet/ResNet.swift b/ResNet/ResNet.swift
index 1ef92e269b1..53751ca291d 100644
--- a/ResNet/ResNet.swift
+++ b/ResNet/ResNet.swift
@@ -20,7 +20,7 @@ import TensorFlow
 // https://arxiv.org/abs/1512.03385
 // using shortcut layer to connect BasicBlock layers (aka Option (B))
 
-enum InputKind {
+enum DataKind {
     case cifar
     case imagenet
 }
@@ -224,8 +224,8 @@ struct ResNetBasic: Layer {
     var flatten = Flatten<Float>()
     var classifier: Dense<Float>
 
-    init(input: InputKind, layerBlockCounts: (Int, Int, Int, Int)) {
-        switch input {
+    init(dataKind: DataKind, layerBlockCounts: (Int, Int, Int, Int)) {
+        switch dataKind {
         case .imagenet:
             l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
             maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
@@ -265,12 +265,12 @@ extension ResNetBasic {
         case resNet34
     }
 
-    init(kind: Kind, type: InputKind) {
-        switch kind {
+    init(inputKind: Kind, data: DataKind) {
+        switch inputKind {
         case .resNet18:
-            self.init(input: type, layerBlockCounts: (2, 2, 2, 2))
+            self.init(dataKind: data, layerBlockCounts: (2, 2, 2, 2))
         case .resNet34:
-            self.init(input: type, layerBlockCounts: (3, 4, 6, 3))
+            self.init(dataKind: data, layerBlockCounts: (3, 4, 6, 3))
         }
     }
 }
@@ -298,8 +298,8 @@ struct ResNet: Layer {
     var flatten = Flatten<Float>()
     var classifier: Dense<Float>
 
-    init(input: InputKind, layerBlockCounts: (Int, Int, Int, Int)) {
-        switch input {
+    init(dataKind: DataKind, layerBlockCounts: (Int, Int, Int, Int)) {
+        switch dataKind {
         case .imagenet:
             l1 = ConvBN(filterShape: (7, 7, 3, 64), strides: (2, 2), padding: .same)
             maxPool = MaxPool2D(poolSize: (3, 3), strides: (2, 2))
@@ -340,14 +340,14 @@ extension ResNet {
         case resNet152
     }
 
-    init(kind: Kind, type: InputKind) {
-        switch kind {
+    init(inputKind: Kind, data: DataKind) {
+        switch inputKind {
         case .resNet50:
-            self.init(input: type, layerBlockCounts: (3, 4, 6, 3))
+            self.init(dataKind: data, layerBlockCounts: (3, 4, 6, 3))
         case .resNet101:
-            self.init(input: type, layerBlockCounts: (3, 4, 23, 3))
+            self.init(dataKind: data, layerBlockCounts: (3, 4, 23, 3))
         case .resNet152:
-            self.init(input: type, layerBlockCounts: (3, 8, 36, 3))
+            self.init(dataKind: data, layerBlockCounts: (3, 8, 36, 3))
         }
     }
 }
diff --git a/ResNet/main.swift b/ResNet/main.swift
index 8615543c951..3e9c6026aa2 100644
--- a/ResNet/main.swift
+++ b/ResNet/main.swift
@@ -22,7 +22,7 @@ let cifarDataset = loadCIFAR10()
 let testBatches = cifarDataset.test.batched(batchSize)
 
 // Use the network sized for CIFAR-10
-var model = ResNet(kind: .resNet50, type: .cifar)
+var model = ResNet(inputKind: .resNet50, data: .cifar)
 
 // the classic ImageNet optimizer setting diverges on CIFAR-10
 // let optimizer = SGD(for: model, learningRate: 0.1, momentum: 0.9)

From bfece34e5e891aca645740453d5f7b1a033488a6 Mon Sep 17 00:00:00 2001
From: Brett Koonce <koonce@hello.com>
Date: Mon, 13 May 2019 20:29:57 -0500
Subject: [PATCH 5/5] match input naming scheme

---
 ResNet/ResNet.swift | 14 +++++++-------
 ResNet/main.swift   |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/ResNet/ResNet.swift b/ResNet/ResNet.swift
index 53751ca291d..9e25fa4c121 100644
--- a/ResNet/ResNet.swift
+++ b/ResNet/ResNet.swift
@@ -265,12 +265,12 @@ extension ResNetBasic {
         case resNet34
     }
 
-    init(inputKind: Kind, data: DataKind) {
+    init(inputKind: Kind, dataKind: DataKind) {
         switch inputKind {
         case .resNet18:
-            self.init(dataKind: data, layerBlockCounts: (2, 2, 2, 2))
+            self.init(dataKind: dataKind, layerBlockCounts: (2, 2, 2, 2))
         case .resNet34:
-            self.init(dataKind: data, layerBlockCounts: (3, 4, 6, 3))
+            self.init(dataKind: dataKind, layerBlockCounts: (3, 4, 6, 3))
         }
     }
 }
@@ -340,14 +340,14 @@ extension ResNet {
         case resNet152
     }
 
-    init(inputKind: Kind, data: DataKind) {
+    init(inputKind: Kind, dataKind: DataKind) {
         switch inputKind {
         case .resNet50:
-            self.init(dataKind: data, layerBlockCounts: (3, 4, 6, 3))
+            self.init(dataKind: dataKind, layerBlockCounts: (3, 4, 6, 3))
         case .resNet101:
-            self.init(dataKind: data, layerBlockCounts: (3, 4, 23, 3))
+            self.init(dataKind: dataKind, layerBlockCounts: (3, 4, 23, 3))
         case .resNet152:
-            self.init(dataKind: data, layerBlockCounts: (3, 8, 36, 3))
+            self.init(dataKind: dataKind, layerBlockCounts: (3, 8, 36, 3))
         }
     }
 }
diff --git a/ResNet/main.swift b/ResNet/main.swift
index 3e9c6026aa2..48bf7d4c844 100644
--- a/ResNet/main.swift
+++ b/ResNet/main.swift
@@ -22,7 +22,7 @@ let cifarDataset = loadCIFAR10()
 let testBatches = cifarDataset.test.batched(batchSize)
 
 // Use the network sized for CIFAR-10
-var model = ResNet(inputKind: .resNet50, data: .cifar)
+var model = ResNet(inputKind: .resNet50, dataKind: .cifar)
 
 // the classic ImageNet optimizer setting diverges on CIFAR-10
 // let optimizer = SGD(for: model, learningRate: 0.1, momentum: 0.9)