From 2f325d93ca44b12df40f37d75e10fe0818357ea1 Mon Sep 17 00:00:00 2001
From: Emanuel Zephir <emanuel@emanuelzephir.com>
Date: Thu, 31 Dec 2015 14:27:42 -0800
Subject: [PATCH 01/12] [SILOptimizer] Refactor ObjC dependencies in
 sil_combine tests

Move functionality that requires ObjC interop into a new file and remove the
XFAIL on Linux. Partially resolves SR-216.
---
 test/SILOptimizer/sil_combine.sil      | 138 -----------------------
 test/SILOptimizer/sil_combine_objc.sil | 150 +++++++++++++++++++++++++
 2 files changed, 150 insertions(+), 138 deletions(-)
 create mode 100644 test/SILOptimizer/sil_combine_objc.sil

diff --git a/test/SILOptimizer/sil_combine.sil b/test/SILOptimizer/sil_combine.sil
index 628d796e6f484..4774731ea71e5 100644
--- a/test/SILOptimizer/sil_combine.sil
+++ b/test/SILOptimizer/sil_combine.sil
@@ -1,7 +1,5 @@
 // RUN: %target-sil-opt -enable-sil-verify-all %s -sil-combine -verify-skip-unreachable-must-be-last | FileCheck %s
 
-// XFAIL: linux
-
 sil_stage canonical
 
 import Builtin
@@ -694,48 +692,6 @@ bb0(%0 : $*X):
   return %19 : $()
 }
 
-sil @stringcore_invariant_check : $@convention(thin) (@owned _StringCore) -> @owned Optional<_CocoaStringType>
-sil @reabstraction_thunk : $@convention(thin) (@out Optional<_CocoaStringType>, @owned @callee_owned () -> @owned Optional<_CocoaStringType>) -> ()
-
-// CHECK-LABEL: sil @dead_closure_elimination : $@convention(thin) (@owned _StringCore) -> ()
-// CHECK: bb0
-// CHECK-NEXT: release_value
-// CHECK-NEXT: tuple
-// CHECK-NEXT: return
-sil @dead_closure_elimination : $@convention(thin) (@owned _StringCore) -> () {
-bb0(%0 : $_StringCore):
-  %1 = function_ref @stringcore_invariant_check : $@convention(thin) (@owned _StringCore) -> @owned Optional<_CocoaStringType>
-  %2 = partial_apply %1(%0) : $@convention(thin) (@owned _StringCore) -> @owned Optional<_CocoaStringType>
-  %3 = function_ref @reabstraction_thunk : $@convention(thin) (@out Optional<_CocoaStringType>, @owned @callee_owned () -> @owned Optional<_CocoaStringType>) -> ()
-  %4 = partial_apply %3(%2) : $@convention(thin) (@out Optional<_CocoaStringType>, @owned @callee_owned () -> @owned Optional<_CocoaStringType>) -> ()
-  strong_release %4 : $@callee_owned (@out Optional<_CocoaStringType>) -> ()
-  %5 = tuple()
-  return %5 : $()
-}
-
-// CHECK-LABEL: sil @dead_closure_elimination2
-// CHECK:      bb0
-// CHECK-NEXT:   br bb1
-// CHECK:      bb1
-// CHECK-NEXT:   release_value
-// CHECK-NEXT:   tuple
-// CHECK-NEXT:   return
-sil @dead_closure_elimination2 : $@convention(thin) (@owned _StringCore) -> () {
-bb0(%0 : $_StringCore):
-  %1 = function_ref @stringcore_invariant_check : $@convention(thin) (@owned _StringCore) -> @owned Optional<_CocoaStringType>
-  %2 = partial_apply %1(%0) : $@convention(thin) (@owned _StringCore) -> @owned Optional<_CocoaStringType>
-  %3 = function_ref @reabstraction_thunk : $@convention(thin) (@out Optional<_CocoaStringType>, @owned @callee_owned () -> @owned Optional<_CocoaStringType>) -> ()
-  %4 = partial_apply %3(%2) : $@convention(thin) (@out Optional<_CocoaStringType>, @owned @callee_owned () -> @owned Optional<_CocoaStringType>) -> ()
-  br bb1
-
-bb1:
-  strong_retain %4 : $@callee_owned (@out Optional<_CocoaStringType>) -> ()
-  strong_release %4 : $@callee_owned (@out Optional<_CocoaStringType>) -> ()
-  strong_release %4 : $@callee_owned (@out Optional<_CocoaStringType>) -> ()
-  %5 = tuple()
-  return %5 : $()
-}
-
 sil @unbalanced_closure : $@convention(thin) (@guaranteed B) -> ()
 
 // CHECK-LABEL: sil @partial_apply_unbalanced_retain_release
@@ -1855,32 +1811,6 @@ bb3(%a : $ZZZ):
   return %a : $ZZZ
 }
 
-
-// FIXME: <rdar://problem/20980377> Add dead array elimination to DeadObjectElimination
-// CHECK-LABEL: test_dead_array
-// CHECK: bb0(%0 : $ZZZ):
-// DISABLED-CHECK-NEXT: strong_release %0
-// DISABLED-CHECK-NEXT: tuple
-// DISABLED-CHECK-NEXT: return
-sil @test_dead_array : $@convention(thin) (@owned ZZZ) -> () {
-bb0(%0 : $ZZZ):
-  %1 = integer_literal $Builtin.Word, 1
-  %2 = function_ref @_allocate_uninitialized_ZZZ : $@convention(thin) (Builtin.Word) -> @owned (Array<ZZZ>, Builtin.RawPointer)
-  %3 = apply %2(%1) : $@convention(thin) (Builtin.Word) -> @owned (Array<ZZZ>, Builtin.RawPointer)
-  %4 = tuple_extract %3 : $(Array<ZZZ>, Builtin.RawPointer), 0
-  %5 = tuple_extract %3 : $(Array<ZZZ>, Builtin.RawPointer), 1
-  %6 = pointer_to_address %5 : $Builtin.RawPointer to $*ZZZ
-  store %0 to %6 : $*ZZZ
-  %8 = struct_extract %4 : $Array<ZZZ>, #Array._buffer
-  %9 = struct_extract %8 : $_ArrayBuffer<ZZZ>, #_ArrayBuffer._storage
-  %10 = struct_extract %9 : $_BridgeStorage<_ContiguousArrayStorageBase, _NSArrayCoreType>, #_BridgeStorage.rawValue
-  strong_release %10 : $Builtin.BridgeObject
-  %12 = tuple ()
-  return %12 : $()
-}
-
-sil [_semantics "array.uninitialized"] @_allocate_uninitialized_ZZZ : $@convention(thin) (Builtin.Word) -> @owned (Array<ZZZ>, Builtin.RawPointer)
-
 struct FakeInt16 {
   var val : Builtin.Int16
 }
@@ -2602,55 +2532,6 @@ bb0(%0 : $Builtin.Int1):
   return %2 : $Builtin.Int1
 }
 
-// dead_array test helpers
-sil [thunk] @dead_array_run_closure : $@convention(thin) (@owned @callee_owned () -> Bool) -> () {
-bb0(%0 : $@callee_owned () -> Bool):
-  %1 = apply %0() : $@callee_owned () -> Bool
-  %2 = tuple ()
-  return %2 : $()
-}
-
-sil @dead_array_closure : $@convention(thin) (@inout _HeapBuffer<C, Int>) -> Bool {
-bb0(%0 : $*_HeapBuffer<C, Int>):
-  %1 = struct_element_addr %0 : $*_HeapBuffer<C, Int>, #_HeapBuffer._storage // user: %2
-  %2 = is_unique %1 : $*Optional<Builtin.NativeObject> // user: %3
-  %3 = struct $Bool (%2 : $Builtin.Int1)          // user: %4
-  return %3 : $Bool                               // id: %4
-}
-
-// Mimicks Swift._allocateUninitializedArray
-sil [_semantics "array.uninitialized"] @dead_array_alloc : $@convention(thin) <τ_0_0> (Builtin.Word) -> @owned (Array<τ_0_0>, Builtin.RawPointer)
-
-// <rdar://problem/20980377> HeapBuffer.swift test case spuriously reports a "unique" buffer
-// CHECK-LABEL: sil @dead_array
-// CHECK-NOT: release
-// CHECK: retain_value %{{[0-9]+}} : $Optional<Builtin.NativeObject>
-// CHECK: apply
-// CHECK: strong_release %{{[0-9]+}} : $Builtin.BridgeObject
-sil @dead_array : $@convention(thin) (@inout _HeapBuffer<C, Int>) -> () {
-bb0(%0 : $*_HeapBuffer<C, Int>):
-  %1 = integer_literal $Builtin.Word, 1           // user: %3
-  %2 = function_ref @dead_array_alloc : $@convention(thin) <τ_0_0> (Builtin.Word) -> @owned (Array<τ_0_0>, Builtin.RawPointer)
-  %3 = apply %2<_HeapBuffer<C, Int>>(%1) : $@convention(thin) <τ_0_0> (Builtin.Word) -> @owned (Array<τ_0_0>, Builtin.RawPointer)
-  %4 = tuple_extract %3 : $(Array<_HeapBuffer<C, Int>>, Builtin.RawPointer), 0 // user: %15
-  %5 = tuple_extract %3 : $(Array<_HeapBuffer<C, Int>>, Builtin.RawPointer), 1 // user: %6
-  %6 = pointer_to_address %5 : $Builtin.RawPointer to $*_HeapBuffer<C, Int> // user: %9
-  %7 = load %0 : $*_HeapBuffer<C, Int>            // users: %8, %9
-  %8 = struct_extract %7 : $_HeapBuffer<C, Int>, #_HeapBuffer._storage // user: %13
-  store %7 to %6 : $*_HeapBuffer<C, Int>          // id: %9
-  %10 = function_ref @dead_array_run_closure : $@convention(thin) (@owned @callee_owned () -> Bool) -> () // user: %14
-  %11 = function_ref @dead_array_closure : $@convention(thin) (@inout _HeapBuffer<C, Int>) -> Bool // user: %12
-  %12 = partial_apply %11(%0) : $@convention(thin) (@inout _HeapBuffer<C, Int>) -> Bool // user: %14
-  retain_value %8 : $Optional<Builtin.NativeObject> // id: %13
-  %14 = apply %10(%12) : $@convention(thin) (@owned @callee_owned () -> Bool) -> ()
-  %15 = struct_extract %4 : $Array<_HeapBuffer<C, Int>>, #Array._buffer // user: %16
-  %16 = struct_extract %15 : $_ArrayBuffer<_HeapBuffer<C, Int>>, #_ArrayBuffer._storage // user: %17
-  %17 = struct_extract %16 : $_BridgeStorage<_ContiguousArrayStorageBase, _NSArrayCoreType>, #_BridgeStorage.rawValue // user: %18
-  strong_release %17 : $Builtin.BridgeObject      // id: %18
-  %19 = tuple ()                                  // user: %20
-  return %19 : $()                                // id: %20
-}
-
 struct NStruct {
 	var a:Int
 	var b:Int
@@ -2792,25 +2673,6 @@ bb0:
   return %2 : $Builtin.Int1
 }
 
-// Check that it does not crash the compiler.
-// Int is ObjC-bridgeable in this case, but its conformance is not know, 
-// because Foundation is not imported yet.
-// Therefore the cast may succeed from the compiler point of view.
-// CHECK-LABEL: sil @cast_of_class_to_int
-// CHECK: unconditional_checked_cast_addr
-// CHECK: return
-sil @cast_of_class_to_int : $@convention(thin) (C) -> Int {
-bb0(%0 : $C):
-  %1 = alloc_stack $Int
-  %2 = alloc_stack $C
-  store %0 to %2#1 : $*C
-  unconditional_checked_cast_addr take_always C in %2#1 : $*C to Int in %1#1 : $*Int 
-  %4 = load %1#1 : $*Int
-  dealloc_stack %2#0 : $*@local_storage C
-  dealloc_stack %1#0 : $*@local_storage Int
-  return %4 : $Int
-}
-
 class CC1 {
    deinit 
   init()
diff --git a/test/SILOptimizer/sil_combine_objc.sil b/test/SILOptimizer/sil_combine_objc.sil
new file mode 100644
index 0000000000000..dd441c7c131b5
--- /dev/null
+++ b/test/SILOptimizer/sil_combine_objc.sil
@@ -0,0 +1,150 @@
+// RUN: %target-sil-opt -enable-sil-verify-all %s -sil-combine -verify-skip-unreachable-must-be-last | FileCheck %s
+// REQUIRES: objc_interop
+
+sil_stage canonical
+
+import Builtin
+import Swift
+
+class ZZZ {
+  @objc deinit 
+  init()
+}
+
+class C {}
+
+sil @stringcore_invariant_check : $@convention(thin) (@owned _StringCore) -> @owned Optional<_CocoaStringType>
+sil @reabstraction_thunk : $@convention(thin) (@out Optional<_CocoaStringType>, @owned @callee_owned () -> @owned Optional<_CocoaStringType>) -> ()
+
+// CHECK-LABEL: sil @dead_closure_elimination : $@convention(thin) (@owned _StringCore) -> ()
+// CHECK: bb0
+// CHECK-NEXT: release_value
+// CHECK-NEXT: tuple
+// CHECK-NEXT: return
+sil @dead_closure_elimination : $@convention(thin) (@owned _StringCore) -> () {
+bb0(%0 : $_StringCore):
+  %1 = function_ref @stringcore_invariant_check : $@convention(thin) (@owned _StringCore) -> @owned Optional<_CocoaStringType>
+  %2 = partial_apply %1(%0) : $@convention(thin) (@owned _StringCore) -> @owned Optional<_CocoaStringType>
+  %3 = function_ref @reabstraction_thunk : $@convention(thin) (@out Optional<_CocoaStringType>, @owned @callee_owned () -> @owned Optional<_CocoaStringType>) -> ()
+  %4 = partial_apply %3(%2) : $@convention(thin) (@out Optional<_CocoaStringType>, @owned @callee_owned () -> @owned Optional<_CocoaStringType>) -> ()
+  strong_release %4 : $@callee_owned (@out Optional<_CocoaStringType>) -> ()
+  %5 = tuple()
+  return %5 : $()
+}
+
+// CHECK-LABEL: sil @dead_closure_elimination2
+// CHECK:      bb0
+// CHECK-NEXT:   br bb1
+// CHECK:      bb1
+// CHECK-NEXT:   release_value
+// CHECK-NEXT:   tuple
+// CHECK-NEXT:   return
+sil @dead_closure_elimination2 : $@convention(thin) (@owned _StringCore) -> () {
+bb0(%0 : $_StringCore):
+  %1 = function_ref @stringcore_invariant_check : $@convention(thin) (@owned _StringCore) -> @owned Optional<_CocoaStringType>
+  %2 = partial_apply %1(%0) : $@convention(thin) (@owned _StringCore) -> @owned Optional<_CocoaStringType>
+  %3 = function_ref @reabstraction_thunk : $@convention(thin) (@out Optional<_CocoaStringType>, @owned @callee_owned () -> @owned Optional<_CocoaStringType>) -> ()
+  %4 = partial_apply %3(%2) : $@convention(thin) (@out Optional<_CocoaStringType>, @owned @callee_owned () -> @owned Optional<_CocoaStringType>) -> ()
+  br bb1
+
+bb1:
+  strong_retain %4 : $@callee_owned (@out Optional<_CocoaStringType>) -> ()
+  strong_release %4 : $@callee_owned (@out Optional<_CocoaStringType>) -> ()
+  strong_release %4 : $@callee_owned (@out Optional<_CocoaStringType>) -> ()
+  %5 = tuple()
+  return %5 : $()
+}
+
+// FIXME: <rdar://problem/20980377> Add dead array elimination to DeadObjectElimination
+// CHECK-LABEL: test_dead_array
+// CHECK: bb0(%0 : $ZZZ):
+// DISABLED-CHECK-NEXT: strong_release %0
+// DISABLED-CHECK-NEXT: tuple
+// DISABLED-CHECK-NEXT: return
+sil @test_dead_array : $@convention(thin) (@owned ZZZ) -> () {
+bb0(%0 : $ZZZ):
+  %1 = integer_literal $Builtin.Word, 1
+  %2 = function_ref @_allocate_uninitialized_ZZZ : $@convention(thin) (Builtin.Word) -> @owned (Array<ZZZ>, Builtin.RawPointer)
+  %3 = apply %2(%1) : $@convention(thin) (Builtin.Word) -> @owned (Array<ZZZ>, Builtin.RawPointer)
+  %4 = tuple_extract %3 : $(Array<ZZZ>, Builtin.RawPointer), 0
+  %5 = tuple_extract %3 : $(Array<ZZZ>, Builtin.RawPointer), 1
+  %6 = pointer_to_address %5 : $Builtin.RawPointer to $*ZZZ
+  store %0 to %6 : $*ZZZ
+  %8 = struct_extract %4 : $Array<ZZZ>, #Array._buffer
+  %9 = struct_extract %8 : $_ArrayBuffer<ZZZ>, #_ArrayBuffer._storage
+  %10 = struct_extract %9 : $_BridgeStorage<_ContiguousArrayStorageBase, _NSArrayCoreType>, #_BridgeStorage.rawValue
+  strong_release %10 : $Builtin.BridgeObject
+  %12 = tuple ()
+  return %12 : $()
+}
+
+sil [_semantics "array.uninitialized"] @_allocate_uninitialized_ZZZ : $@convention(thin) (Builtin.Word) -> @owned (Array<ZZZ>, Builtin.RawPointer)
+
+// dead_array test helpers
+sil [thunk] @dead_array_run_closure : $@convention(thin) (@owned @callee_owned () -> Bool) -> () {
+bb0(%0 : $@callee_owned () -> Bool):
+  %1 = apply %0() : $@callee_owned () -> Bool
+  %2 = tuple ()
+  return %2 : $()
+}
+
+sil @dead_array_closure : $@convention(thin) (@inout _HeapBuffer<C, Int>) -> Bool {
+bb0(%0 : $*_HeapBuffer<C, Int>):
+  %1 = struct_element_addr %0 : $*_HeapBuffer<C, Int>, #_HeapBuffer._storage // user: %2
+  %2 = is_unique %1 : $*Optional<Builtin.NativeObject> // user: %3
+  %3 = struct $Bool (%2 : $Builtin.Int1)          // user: %4
+  return %3 : $Bool                               // id: %4
+}
+
+// Mimicks Swift._allocateUninitializedArray
+sil [_semantics "array.uninitialized"] @dead_array_alloc : $@convention(thin) <τ_0_0> (Builtin.Word) -> @owned (Array<τ_0_0>, Builtin.RawPointer)
+
+// <rdar://problem/20980377> HeapBuffer.swift test case spuriously reports a "unique" buffer
+// CHECK-LABEL: sil @dead_array
+// CHECK-NOT: release
+// CHECK: retain_value %{{[0-9]+}} : $Optional<Builtin.NativeObject>
+// CHECK: apply
+// CHECK: strong_release %{{[0-9]+}} : $Builtin.BridgeObject
+sil @dead_array : $@convention(thin) (@inout _HeapBuffer<C, Int>) -> () {
+bb0(%0 : $*_HeapBuffer<C, Int>):
+  %1 = integer_literal $Builtin.Word, 1           // user: %3
+  %2 = function_ref @dead_array_alloc : $@convention(thin) <τ_0_0> (Builtin.Word) -> @owned (Array<τ_0_0>, Builtin.RawPointer)
+  %3 = apply %2<_HeapBuffer<C, Int>>(%1) : $@convention(thin) <τ_0_0> (Builtin.Word) -> @owned (Array<τ_0_0>, Builtin.RawPointer)
+  %4 = tuple_extract %3 : $(Array<_HeapBuffer<C, Int>>, Builtin.RawPointer), 0 // user: %15
+  %5 = tuple_extract %3 : $(Array<_HeapBuffer<C, Int>>, Builtin.RawPointer), 1 // user: %6
+  %6 = pointer_to_address %5 : $Builtin.RawPointer to $*_HeapBuffer<C, Int> // user: %9
+  %7 = load %0 : $*_HeapBuffer<C, Int>            // users: %8, %9
+  %8 = struct_extract %7 : $_HeapBuffer<C, Int>, #_HeapBuffer._storage // user: %13
+  store %7 to %6 : $*_HeapBuffer<C, Int>          // id: %9
+  %10 = function_ref @dead_array_run_closure : $@convention(thin) (@owned @callee_owned () -> Bool) -> () // user: %14
+  %11 = function_ref @dead_array_closure : $@convention(thin) (@inout _HeapBuffer<C, Int>) -> Bool // user: %12
+  %12 = partial_apply %11(%0) : $@convention(thin) (@inout _HeapBuffer<C, Int>) -> Bool // user: %14
+  retain_value %8 : $Optional<Builtin.NativeObject> // id: %13
+  %14 = apply %10(%12) : $@convention(thin) (@owned @callee_owned () -> Bool) -> ()
+  %15 = struct_extract %4 : $Array<_HeapBuffer<C, Int>>, #Array._buffer // user: %16
+  %16 = struct_extract %15 : $_ArrayBuffer<_HeapBuffer<C, Int>>, #_ArrayBuffer._storage // user: %17
+  %17 = struct_extract %16 : $_BridgeStorage<_ContiguousArrayStorageBase, _NSArrayCoreType>, #_BridgeStorage.rawValue // user: %18
+  strong_release %17 : $Builtin.BridgeObject      // id: %18
+  %19 = tuple ()                                  // user: %20
+  return %19 : $()                                // id: %20
+}
+
+// Check that it does not crash the compiler.
+// Int is ObjC-bridgeable in this case, but its conformance is not know, 
+// because Foundation is not imported yet.
+// Therefore the cast may succeed from the compiler point of view.
+// CHECK-LABEL: sil @cast_of_class_to_int
+// CHECK: unconditional_checked_cast_addr
+// CHECK: return
+sil @cast_of_class_to_int : $@convention(thin) (C) -> Int {
+bb0(%0 : $C):
+  %1 = alloc_stack $Int
+  %2 = alloc_stack $C
+  store %0 to %2#1 : $*C
+  unconditional_checked_cast_addr take_always C in %2#1 : $*C to Int in %1#1 : $*Int 
+  %4 = load %1#1 : $*Int
+  dealloc_stack %2#0 : $*@local_storage C
+  dealloc_stack %1#0 : $*@local_storage Int
+  return %4 : $Int
+}
+

From 9e2e24b7d81948102bc5c3bf5a41309bec02ab23 Mon Sep 17 00:00:00 2001
From: Erik Eckstein <eeckstein@apple.com>
Date: Mon, 4 Jan 2016 09:31:01 -0800
Subject: [PATCH 02/12] SILCombine: add debug message. NFC.

---
 lib/SILOptimizer/SILCombiner/SILCombine.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/lib/SILOptimizer/SILCombiner/SILCombine.cpp b/lib/SILOptimizer/SILCombiner/SILCombine.cpp
index d2ec785a4052c..35c78b6bda2b6 100644
--- a/lib/SILOptimizer/SILCombiner/SILCombine.cpp
+++ b/lib/SILOptimizer/SILCombiner/SILCombine.cpp
@@ -314,10 +314,15 @@ SILInstruction *SILCombiner::eraseInstFromFunction(SILInstruction &I,
   assert(hasNoUsesExceptDebug(&I) && "Cannot erase instruction that is used!");
   // Make sure that we reprocess all operands now that we reduced their
   // use counts.
-  if (I.getNumOperands() < 8 && AddOperandsToWorklist)
-    for (auto &OpI : I.getAllOperands())
-      if (SILInstruction *Op = llvm::dyn_cast<SILInstruction>(&*OpI.get()))
+  if (I.getNumOperands() < 8 && AddOperandsToWorklist) {
+    for (auto &OpI : I.getAllOperands()) {
+      if (SILInstruction *Op = llvm::dyn_cast<SILInstruction>(&*OpI.get())) {
+        DEBUG(llvm::dbgs() << "SC: add op " << *Op <<
+              " from erased inst to worklist\n");
         Worklist.add(Op);
+      }
+    }
+  }
 
   for (Operand *DU : getDebugUses(I))
     Worklist.remove(DU->getUser());

From f1f4c69476436a2b0bad6155455bee007df29f69 Mon Sep 17 00:00:00 2001
From: Erik Eckstein <eeckstein@apple.com>
Date: Mon, 4 Jan 2016 09:39:58 -0800
Subject: [PATCH 03/12] SILCombine: fix non-deterministic compilation

This is another bug exposed by changing the instruction allocation.
Depending on the allocated address of new SILBuilder instructions, those instructions were added to the SILCombiner worklist or not.
This bug didn't cause any crashes or miscombiles, but resulted in a non-deterministic result of SILCombine.
---
 lib/SILOptimizer/SILCombiner/SILCombine.cpp | 23 +++++++++++++++------
 lib/SILOptimizer/SILCombiner/SILCombiner.h  |  6 +-----
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/lib/SILOptimizer/SILCombiner/SILCombine.cpp b/lib/SILOptimizer/SILCombiner/SILCombine.cpp
index 35c78b6bda2b6..eb179341021b0 100644
--- a/lib/SILOptimizer/SILCombiner/SILCombine.cpp
+++ b/lib/SILOptimizer/SILCombiner/SILCombine.cpp
@@ -211,11 +211,11 @@ bool SILCombiner::doOneIteration(SILFunction &F, unsigned Iteration) {
     // the next iteration.
     auto &TrackingList = *Builder.getTrackingList();
     for (SILInstruction *I : TrackingList) {
-      if (!DeletedInstSet.count(I))
-        Worklist.add(I);
+      DEBUG(llvm::dbgs() << "SC: add " << *I <<
+            " from trackinglist to worklist\n");
+      Worklist.add(I);
     }
     TrackingList.clear();
-    DeletedInstSet.clear();
   }
 
   Worklist.zap();
@@ -329,7 +329,6 @@ SILInstruction *SILCombiner::eraseInstFromFunction(SILInstruction &I,
 
   Worklist.remove(&I);
   eraseFromParentWithDebugInsts(&I, InstIter);
-  DeletedInstSet.insert(&I);
   MadeChange = true;
   return nullptr;  // Don't do anything with I
 }
@@ -342,23 +341,35 @@ namespace {
 
 class SILCombine : public SILFunctionTransform {
 
+  llvm::SmallVector<SILInstruction *, 64> TrackingList;
+  
   /// The entry point to the transformation.
   void run() override {
     auto *AA = PM->getAnalysis<AliasAnalysis>();
 
     // Create a SILBuilder with a tracking list for newly added
     // instructions, which we will periodically move to our worklist.
-    llvm::SmallVector<SILInstruction *, 64> TrackingList;
-
     SILBuilder B(*getFunction(), &TrackingList);
     SILCombiner Combiner(B, AA, getOptions().RemoveRuntimeAsserts);
     bool Changed = Combiner.runOnFunction(*getFunction());
+    assert(TrackingList.empty() &&
+           "TrackingList should be fully processed by SILCombiner");
 
     if (Changed) {
       // Invalidate everything.
       invalidateAnalysis(SILAnalysis::InvalidationKind::FunctionBody);
     }
   }
+  
+  virtual void handleDeleteNotification(ValueBase *I) override {
+    // Linear searching the tracking list doesn't hurt because usually it only
+    // contains a few elements.
+    auto Iter = std::find(TrackingList.begin(), TrackingList.end(), I);
+    if (Iter != TrackingList.end())
+      TrackingList.erase(Iter);      
+  }
+  
+  virtual bool needsNotifications() override { return true; }
 
   StringRef getName() override { return "SIL Combine"; }
 };
diff --git a/lib/SILOptimizer/SILCombiner/SILCombiner.h b/lib/SILOptimizer/SILCombiner/SILCombiner.h
index 2ba39649bb6ea..22112b0ee67f9 100644
--- a/lib/SILOptimizer/SILCombiner/SILCombiner.h
+++ b/lib/SILOptimizer/SILCombiner/SILCombiner.h
@@ -130,17 +130,13 @@ class SILCombiner :
   /// Builder used to insert instructions.
   SILBuilder &Builder;
 
-  /// A set of instructions which have been deleted during this iteration. It is
-  /// used to make sure that we do not
-  llvm::DenseSet<SILInstruction *> DeletedInstSet;
-
   /// Cast optimizer
   CastOptimizer CastOpt;
 
 public:
   SILCombiner(SILBuilder &B, AliasAnalysis *AA, bool removeCondFails)
       : AA(AA), Worklist(), MadeChange(false), RemoveCondFails(removeCondFails),
-        Iteration(0), Builder(B), DeletedInstSet(128),
+        Iteration(0), Builder(B),
         CastOpt(/* ReplaceInstUsesAction */
                 [&](SILInstruction *I, ValueBase * V) {
                   replaceInstUsesWith(*I, V);

From ba40b3f1a7d5b1c0b48ebe4bee03fe1d921c527c Mon Sep 17 00:00:00 2001
From: Xin Tong <xin_tong@apple.com>
Date: Mon, 4 Jan 2016 10:01:20 -0800
Subject: [PATCH 04/12] Take a more displined approach in DSE as to how to a
 function is optimized.

Now we have 3 cases.

1. OptimizeNone (for functions with too many basicblocks and too many locations). Simply return.
2. Pessimisitc single iteration data flow (for functions with many basic blocks and many locations).
3. Optimistic multiple iteration data flow (for functions with some basic blocks and some locations
   and require iterative data flow).

With this change stdlib and stdlibunittest has some changes in dead store(DS)
eliminated.

stdlib: 202 -> 203 DS.
stdlibunittest: 42 - 39 DS.

Compilation time improvement: with this change on a RELEASE+ASSERT compiler for stdlibunittest.

Running Time        Self (ms)               Symbol Name
5525.0ms    5.3%    25.0                     (anonymous namespace)::ARCSequenceOpts::run()
3500.0ms    3.4%    25.0                     (anonymous namespace)::RedundantLoadElimination::run()
3050.0ms    2.9%    25.0                     (anonymous namespace)::SILCombine::run()
2700.0ms    2.6%    0.0                      (anonymous namespace)::SimplifyCFGPass::run()
2100.0ms    2.0%    75.0                     (anonymous namespace)::SILCSE::run()
1450.0ms    1.4%    0.0                      (anonymous namespace)::DeadStoreElimination::run()
750.0ms    0.7%     75.0                     (anonymous namespace)::DCE::run()

Compilation time improvement: with this change on a DEBUG compiler for stdlibunittest.

Running Time        Self (ms)               Symbol Name
42300.0ms    4.9%   50.0                      (anonymous namespace)::ARCSequenceOpts::run()
35875.0ms    4.1%   0.0                       (anonymous namespace)::RedundantLoadElimination::run()
30475.0ms    3.5%   0.0                       (anonymous namespace)::SILCombine::run()
19675.0ms    2.3%   0.0                       (anonymous namespace)::SILCSE::run()
18150.0ms    2.1%   25.0                      (anonymous namespace)::SimplifyCFGPass::run()
12475.0ms    1.4%   0.0                       (anonymous namespace)::DeadStoreElimination::run()
5775.0ms    0.6%    0.0                       (anonymous namespace)::DCE::run()

I do not see a compilation time change in stdlib.

Existing tests ensure correctness.
---
 .../Transforms/DeadStoreElimination.cpp       | 84 +++++++++++--------
 1 file changed, 49 insertions(+), 35 deletions(-)

diff --git a/lib/SILOptimizer/Transforms/DeadStoreElimination.cpp b/lib/SILOptimizer/Transforms/DeadStoreElimination.cpp
index 44369c0600e64..99460f9f9272d 100644
--- a/lib/SILOptimizer/Transforms/DeadStoreElimination.cpp
+++ b/lib/SILOptimizer/Transforms/DeadStoreElimination.cpp
@@ -136,12 +136,18 @@ static bool isDeadStoreInertInstruction(SILInstruction *Inst) {
 
 namespace {
 
-// If there are too many locations in the function, we bail out.
-constexpr unsigned MaxLSLocationLimit = 2048;
-
-constexpr unsigned MaxOneIterationFunctionBBLimit = 32;
+/// If this function has too many basic blocks or too many locations, it may
+/// take a long time to compute the genset and killset. The number of memory
+/// behavior or alias query we need to do in worst case is roughly linear to
+/// # of BBs x(times) # of locations.
+///
+/// we could run DSE on functions with 256 basic blocks and 256 locations,
+/// which is a large function.  
+constexpr unsigned MaxLSLocationBBMultiplicationNone = 256*256;
 
-constexpr unsigned MaxOneIterationFunctionLSLocationLimit = 64;
+/// we could run optimsitic DSE on functions with less than 64 basic blocks
+/// and 64 locations which is a sizeable function.
+constexpr unsigned MaxLSLocationBBMultiplicationPessimistic = 64*64;
 
 /// If a large store is broken down to too many smaller stores, bail out.
 /// Currently, we only do partial dead store if we can form a single contiguous
@@ -234,7 +240,7 @@ class BlockState {
   void initReturnBlock(DSEContext &Ctx);
 
   /// Initialize the bitvectors for the current basic block.
-  void init(DSEContext &Ctx, bool OneIterationFunction);
+  void init(DSEContext &Ctx, bool PessimisticDF);
 
   /// Check whether the BBWriteSetIn has changed. If it does, we need to rerun
   /// the data flow on this block's predecessors to reach fixed point.
@@ -274,6 +280,12 @@ bool BlockState::isTrackingLocation(llvm::SmallBitVector &BV, unsigned i) {
 namespace {
 
 class DSEContext {
+  enum class ProcessKind {
+    ProcessOptimistic = 0,
+    ProcessPessimistic = 1,
+    ProcessNone = 2,
+  }; 
+private:
   /// The module we are currently processing.
   SILModule *Mod;
 
@@ -415,11 +427,11 @@ class DSEContext {
 
   /// Use a set of ad hoc rules to tell whether we should run a pessimistic
   /// one iteration data flow on the function.
-  bool isOneIterationFunction();
+  ProcessKind getProcessFunctionKind();
 
   /// Compute the kill set for the basic block. return true if the store set
   /// changes.
-  void processBasicBlockForDSE(SILBasicBlock *BB, bool OneIterationFunction);
+  void processBasicBlockForDSE(SILBasicBlock *BB, bool PessimisticDF);
 
   /// Compute the genset and killset for the current basic block.
   void processBasicBlockForGenKillSet(SILBasicBlock *BB);
@@ -451,7 +463,7 @@ void BlockState::initReturnBlock(DSEContext &Ctx) {
   }
 }
 
-void BlockState::init(DSEContext &Ctx, bool OneIterationFunction)  {
+void BlockState::init(DSEContext &Ctx, bool PessimisticDF)  {
   std::vector<LSLocation> &LV = Ctx.getLocationVault();
   LocationNum = LV.size();
   // For function that requires just 1 iteration of the data flow to converge
@@ -470,7 +482,7 @@ void BlockState::init(DSEContext &Ctx, bool OneIterationFunction)  {
   // However, by doing so, we can only eliminate the dead stores after the
   // data flow stabilizes.
   //
-  BBWriteSetIn.resize(LocationNum, !OneIterationFunction);
+  BBWriteSetIn.resize(LocationNum, !PessimisticDF);
   BBWriteSetOut.resize(LocationNum, false);
   BBWriteSetMid.resize(LocationNum, false);
 
@@ -498,9 +510,10 @@ unsigned DSEContext::getLocationBit(const LSLocation &Loc) {
   return Iter->second;
 }
 
-bool DSEContext::isOneIterationFunction() {
+DSEContext::ProcessKind DSEContext::getProcessFunctionKind() {
   bool RunOneIteration = true;
   unsigned BBCount = 0;
+  unsigned LocationCount = LocationVault.size();
 
   // If all basic blocks will have their successors processed if
   // the basic blocks in the functions are iterated in post order.
@@ -519,19 +532,20 @@ bool DSEContext::isOneIterationFunction() {
     HandledBBs.insert(B);
   }
 
-  // If this function has too many basic blocks or too many locations, it may
-  // take a long time to compute the genset and killset. The number of memory
-  // behavior or alias query we need to do in worst case is roughly linear to
-  // # of BBs x(times) # of locations.
-  //
-  // Instead, we run one pessimistic data flow to do dead store elimination on
+  // Data flow may take too long to run.
+  if (BBCount * LocationCount > MaxLSLocationBBMultiplicationNone)
+    return ProcessKind::ProcessNone;
+
+  // This function's data flow would converge in 1 iteration.
+  if (RunOneIteration)
+    return ProcessKind::ProcessPessimistic;
+  
+  // We run one pessimistic data flow to do dead store elimination on
   // the function.
-  if (BBCount > MaxOneIterationFunctionBBLimit)
-    RunOneIteration = true;
-  if (LocationVault.size() > MaxOneIterationFunctionLSLocationLimit)
-    RunOneIteration = true;
+  if (BBCount * LocationCount > MaxLSLocationBBMultiplicationPessimistic)
+    return ProcessKind::ProcessPessimistic;
 
-  return RunOneIteration;
+  return ProcessKind::ProcessOptimistic;
 }
 
 void DSEContext::processBasicBlockForGenKillSet(SILBasicBlock *BB) {
@@ -587,13 +601,13 @@ bool DSEContext::processBasicBlockWithGenKillSet(SILBasicBlock *BB) {
 }
 
 void DSEContext::processBasicBlockForDSE(SILBasicBlock *BB,
-                                         bool OneIterationFunction) {
+                                         bool PessimisticDF) {
   // If we know this is not a one iteration function which means its
   // its BBWriteSetIn and BBWriteSetOut have been computed and converged, 
   // and this basic block does not even have StoreInsts, there is no point
   // in processing every instruction in the basic block again as no store
   // will be eliminated. 
-  if (!OneIterationFunction && BBWithStores.find(BB) == BBWithStores.end())
+  if (!PessimisticDF && BBWithStores.find(BB) == BBWithStores.end())
        return;
 
   // Intersect in the successor WriteSetIns. A store is dead if it is not read
@@ -1067,15 +1081,15 @@ bool DSEContext::run() {
   // this function.
   LSLocation::enumerateLSLocations(*F, LocationVault, LocToBitIndex, TE);
 
-  // Do we really need to run the iterative data flow on the function.
-  //
-  // Also check whether this function meets other criteria for pessimistic
-  // one iteration data flow.
-  bool OneIterationFunction = isOneIterationFunction();
+  // Check how to optimize this function.
+  ProcessKind Kind = getProcessFunctionKind();
+  
+  // We do not optimize this function at all.
+  if (Kind == ProcessKind::ProcessNone)
+      return false;
 
-  // Data flow may take too long to converge.
-  if (LocationVault.size() > MaxLSLocationLimit)
-    return false;
+  // Do we run a pessimistic data flow ?
+  bool PessimisticDF = Kind == ProcessKind::ProcessOptimistic ? false : true;
 
   // For all basic blocks in the function, initialize a BB state.
   //
@@ -1086,7 +1100,7 @@ bool DSEContext::run() {
     BlockStates.push_back(BlockState(&B));
     // Since we know all the locations accessed in this function, we can resize
     // the bit vector to the appropriate size.
-    BlockStates.back().init(*this, OneIterationFunction);
+    BlockStates.back().init(*this, PessimisticDF);
   }
 
   // Initialize the BBToLocState mapping.
@@ -1112,14 +1126,14 @@ bool DSEContext::run() {
   // on the function.
 
   // We need to run the iterative data flow on the function.
-  if (!OneIterationFunction) {
+  if (!PessimisticDF) {
     runIterativeDSE();
   }
 
   // The data flow has stabilized, run one last iteration over all the basic
   // blocks and try to remove dead stores.
   for (SILBasicBlock *B : PO->getPostOrder()) {
-    processBasicBlockForDSE(B, OneIterationFunction);
+    processBasicBlockForDSE(B, PessimisticDF);
   }
 
   // Finally, delete the dead stores and create the live stores.

From f61893595d8c65a32b0365e287459527087b6c6b Mon Sep 17 00:00:00 2001
From: Doug Gregor <dgregor@apple.com>
Date: Mon, 4 Jan 2016 10:08:15 -0800
Subject: [PATCH 05/12] [Omit needless words] Don't ask for the StringRef of an
 empty identifier.

---
 lib/Sema/MiscDiagnostics.cpp        | 2 +-
 test/Sema/omit_needless_words.swift | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/Sema/MiscDiagnostics.cpp b/lib/Sema/MiscDiagnostics.cpp
index 184b446079c04..7f5f003acb432 100644
--- a/lib/Sema/MiscDiagnostics.cpp
+++ b/lib/Sema/MiscDiagnostics.cpp
@@ -2068,7 +2068,7 @@ static Optional<DeclName> omitNeedlessWords(AbstractFunctionDecl *afd) {
   // Figure out the first parameter name.
   StringRef firstParamName;
   auto params = afd->getParameterList(afd->getImplicitSelfDecl() ? 1 : 0);
-  if (params->size() != 0)
+  if (params->size() != 0 && !params->get(0)->getName().empty())
     firstParamName = params->get(0)->getName().str();
 
   // Find the set of property names.
diff --git a/test/Sema/omit_needless_words.swift b/test/Sema/omit_needless_words.swift
index 51199d7ac759c..5c4c6bf692005 100644
--- a/test/Sema/omit_needless_words.swift
+++ b/test/Sema/omit_needless_words.swift
@@ -30,3 +30,5 @@ extension String {
 class NSArray {
   func arrayByAddingObject(x: AnyObject) -> NSArray { return NSArray() } // expected-warning{{'arrayByAddingObject' could be named 'adding' [-Womit-needless-words]}}{{8-27=adding}}
 }
+
+func emptyFirstParamName(_: Int) { }

From 94c8a59067be32a80115d1318dbdae6d5550eafe Mon Sep 17 00:00:00 2001
From: Brian Gesiak <modocache@gmail.com>
Date: Fri, 18 Dec 2015 02:00:51 -0500
Subject: [PATCH 06/12] [cmpcodesize] Extract otool subprocess calls

The functions in `cmpcodesize.compare` do several things: they call
otool, they use regex matchers to extract information from otool's
output, and they output that information using `print()`.

Currently, the only way to test cmpcodesize is via functional tests:
ones that actually run on a dylib like libswiftCore.dylib. This takes a
lot of time and is difficult to fully automate.

By extracting otool calls from `cmpcodesize.compare`, we can test those
in isolation. Furthermore, future commits can test the functions in
`cmpcodesize.compare` using canned strings, instead of actual otool
output.
---
 utils/cmpcodesize/cmpcodesize/compare.py | 26 +++------
 utils/cmpcodesize/cmpcodesize/otool.py   | 45 +++++++++++++++
 utils/cmpcodesize/tests/test_otool.py    | 73 ++++++++++++++++++++++++
 3 files changed, 126 insertions(+), 18 deletions(-)
 create mode 100644 utils/cmpcodesize/cmpcodesize/otool.py
 create mode 100644 utils/cmpcodesize/tests/test_otool.py

diff --git a/utils/cmpcodesize/cmpcodesize/compare.py b/utils/cmpcodesize/cmpcodesize/compare.py
index f511db62b2dd7..5f8c39ee1cb51 100644
--- a/utils/cmpcodesize/cmpcodesize/compare.py
+++ b/utils/cmpcodesize/cmpcodesize/compare.py
@@ -12,10 +12,11 @@
 
 import re
 import os
-import subprocess
 import collections
 from operator import itemgetter
 
+from cmpcodesize import otool
+
 Prefixes = {
     # Cpp
     "__Z" : "CPP",
@@ -76,19 +77,10 @@ def addFunction(sizes, function, startAddr, endAddr, groupByPrefix):
         sizes[function] += size
 
 
-def flatten(*args):
-    for x in args:
-        if hasattr(x, '__iter__'):
-            for y in flatten(*x):
-                yield y
-        else:
-            yield x
-
-
 def readSizes(sizes, fileName, functionDetails, groupByPrefix):
     # Check if multiple architectures are supported by the object file.
     # Prefer arm64 if available.
-    architectures = subprocess.check_output(["otool", "-V", "-f", fileName]).split("\n")
+    architectures = otool.fat_headers(fileName).split('\n')
     arch = None
     archPattern = re.compile('architecture ([\S]+)')
     for architecture in architectures:
@@ -98,16 +90,14 @@ def readSizes(sizes, fileName, functionDetails, groupByPrefix):
                 arch = archMatch.group(1)
             if "arm64" in arch:
                 arch = "arm64"
-    if arch is not None:
-      archParams = ["-arch", arch]
-    else:
-      archParams = []
 
     if functionDetails:
-        content = subprocess.check_output(flatten(["otool", archParams, "-l", "-v", "-t", fileName])).split("\n")
-        content += subprocess.check_output(flatten(["otool", archParams, "-v", "-s", "__TEXT", "__textcoal_nt", fileName])).split("\n")
+        content = otool.load_commands(fileName,
+                                      architecture=arch,
+                                      include_text_sections=True).split('\n')
+        content += otool.text_sections(fileName, architecture=arch).split('\n')
     else:
-        content = subprocess.check_output(flatten(["otool", archParams, "-l", fileName])).split("\n")
+        content = otool.load_commands(fileName, architecture=arch).split('\n')
 
     sectName = None
     currFunc = None
diff --git a/utils/cmpcodesize/cmpcodesize/otool.py b/utils/cmpcodesize/cmpcodesize/otool.py
new file mode 100644
index 0000000000000..76d674287d328
--- /dev/null
+++ b/utils/cmpcodesize/cmpcodesize/otool.py
@@ -0,0 +1,45 @@
+import subprocess
+
+
+def _command_for_architecture(architecture=None):
+    if architecture is None:
+        return ['otool']
+    else:
+        return ['otool', '-arch', architecture]
+
+
+def fat_headers(path):
+    """
+    Returns the headers for the executable at the given path.
+    Raises a subprocess.CalledProcessError if otool encounters an error,
+    such as not finding a file at the given path.
+    """
+    return subprocess.check_output(['otool', '-V', '-f', path])
+
+
+def load_commands(path, architecture=None, include_text_sections=False):
+    """
+    Returns the load commands for the executable at the given path,
+    for the given architecture. If print_text_section is specified,
+    the disassembled text section of the load commands is also output.
+
+    Raises a subprocess.CalledProcessError if otool encounters an error,
+    such as not finding a file at the given path.
+    """
+    command = _command_for_architecture(architecture) + ['-l']
+    if include_text_sections:
+        command += ['-v', '-t']
+    return subprocess.check_output(command + [path])
+
+
+def text_sections(path, architecture=None):
+    """
+    Returns the contents of the text sections of the executable at the
+    given path, for the given architecture.
+
+    Raises a subprocess.CalledProcessError if otool encounters an error,
+    such as not finding a file at the given path.
+    """
+    return subprocess.check_output(
+        _command_for_architecture(architecture) +
+        ['-v', '-s', '__TEXT', '__textcoal_nt', path])
diff --git a/utils/cmpcodesize/tests/test_otool.py b/utils/cmpcodesize/tests/test_otool.py
new file mode 100644
index 0000000000000..abc086a4dc601
--- /dev/null
+++ b/utils/cmpcodesize/tests/test_otool.py
@@ -0,0 +1,73 @@
+import subprocess
+import unittest
+
+from cmpcodesize import otool
+
+
+# Store parameters passed to subprocess.check_output into
+# this global variable.
+_subprocess_check_output_arguments = []
+
+
+# We'll monkey-patch subprocess.check_output with this stub
+# function, which simply records whatever's passed to
+# check_output into the global variables above.
+def _stub_subprocess_check_output(arguments, *args, **kwargs):
+    global _subprocess_check_output_arguments
+    _subprocess_check_output_arguments = arguments
+
+
+class OtoolTestCase(unittest.TestCase):
+    def setUp(self):
+        # Monkey-patch subprocess.check_output with our stub function.
+        self._original_check_output = subprocess.check_output
+        subprocess.check_output = _stub_subprocess_check_output
+
+    def tearDown(self):
+        # Undo the monkey-patching.
+        subprocess.check_output = self._original_check_output
+
+    def test_fat_headers(self):
+        otool.fat_headers('/path/to/foo')
+        self.assertEqual(_subprocess_check_output_arguments,
+                         ['otool', '-V', '-f', '/path/to/foo'])
+
+    def test_load_commands_with_no_architecture(self):
+        otool.load_commands('/path/to/bar')
+        self.assertEqual(_subprocess_check_output_arguments,
+                         ['otool', '-l', '/path/to/bar'])
+
+    def test_load_commands_with_architecture(self):
+        otool.load_commands('/path/to/baz', architecture='arch-foo')
+        self.assertEqual(
+            _subprocess_check_output_arguments,
+            ['otool', '-arch', 'arch-foo', '-l', '/path/to/baz'])
+
+    def test_load_commands_no_architecture_but_including_text_sections(self):
+        otool.load_commands(
+            '/path/to/flim', include_text_sections=True)
+        self.assertEqual(
+            _subprocess_check_output_arguments,
+            ['otool', '-l', '-v', '-t', '/path/to/flim'])
+
+    def test_load_commands_with_architecture_and_including_text_sections(self):
+        otool.load_commands(
+            '/path/to/flam',
+            architecture='arch-bar',
+            include_text_sections=True)
+        self.assertEqual(
+            _subprocess_check_output_arguments,
+            ['otool', '-arch', 'arch-bar', '-l', '-v', '-t', '/path/to/flam'])
+
+    def test_text_sections_no_architecture(self):
+        otool.text_sections('/path/to/fish')
+        self.assertEqual(
+            _subprocess_check_output_arguments,
+            ['otool', '-v', '-s', '__TEXT', '__textcoal_nt', '/path/to/fish'])
+
+    def test_text_sections_with_architecture(self):
+        otool.text_sections('/path/to/frosh', architecture='arch-baz')
+        self.assertEqual(
+            _subprocess_check_output_arguments,
+            ['otool', '-arch', 'arch-baz', '-v', '-s',
+             '__TEXT', '__textcoal_nt', '/path/to/frosh'])

From cfe77467cf7a0ff8c348f6c563523f7322587dd3 Mon Sep 17 00:00:00 2001
From: Brian Gesiak <modocache@gmail.com>
Date: Fri, 18 Dec 2015 02:53:15 -0500
Subject: [PATCH 07/12] [cmpcodesize] Begin extracting regular expressions

`cmpcodesize.compare` has a few responsibilities: matching regular
expressions on otool output, and storing the results of those matches
in dictionaries.

Begin extracting the regular expression matching into a separate file,
`cmpcodesize/regex.py`. This makes the code more modular, and allows
for finer-grained unit tests.
---
 utils/cmpcodesize/cmpcodesize/compare.py | 24 ++++++--------
 utils/cmpcodesize/cmpcodesize/regex.py   | 26 +++++++++++++++
 utils/cmpcodesize/tests/test_regex.py    | 41 ++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 15 deletions(-)
 create mode 100644 utils/cmpcodesize/cmpcodesize/regex.py
 create mode 100644 utils/cmpcodesize/tests/test_regex.py

diff --git a/utils/cmpcodesize/cmpcodesize/compare.py b/utils/cmpcodesize/cmpcodesize/compare.py
index 5f8c39ee1cb51..0ed433f057a30 100644
--- a/utils/cmpcodesize/cmpcodesize/compare.py
+++ b/utils/cmpcodesize/cmpcodesize/compare.py
@@ -15,7 +15,7 @@
 import collections
 from operator import itemgetter
 
-from cmpcodesize import otool
+from cmpcodesize import otool, regex
 
 Prefixes = {
     # Cpp
@@ -80,30 +80,24 @@ def addFunction(sizes, function, startAddr, endAddr, groupByPrefix):
 def readSizes(sizes, fileName, functionDetails, groupByPrefix):
     # Check if multiple architectures are supported by the object file.
     # Prefer arm64 if available.
-    architectures = otool.fat_headers(fileName).split('\n')
-    arch = None
-    archPattern = re.compile('architecture ([\S]+)')
-    for architecture in architectures:
-        archMatch = archPattern.match(architecture)
-        if archMatch:
-            if arch is None:
-                arch = archMatch.group(1)
-            if "arm64" in arch:
-                arch = "arm64"
-
+    fat_headers = otool.fat_headers(fileName)
+    architecture = regex.architecture(fat_headers)
     if functionDetails:
         content = otool.load_commands(fileName,
-                                      architecture=arch,
+                                      architecture=architecture,
                                       include_text_sections=True).split('\n')
-        content += otool.text_sections(fileName, architecture=arch).split('\n')
+        content += otool.text_sections(fileName,
+                                       architecture=architecture).split('\n')
     else:
-        content = otool.load_commands(fileName, architecture=arch).split('\n')
+        content = otool.load_commands(fileName,
+                                      architecture=architecture).split('\n')
 
     sectName = None
     currFunc = None
     startAddr = None
     endAddr = None
 
+    # FIXME: Move re calls into cmpcodesize.regex module.
     sectionPattern = re.compile(' +sectname ([\S]+)')
     sizePattern = re.compile(' +size ([\da-fx]+)')
     asmlinePattern = re.compile('^([0-9a-fA-F]+)\s')
diff --git a/utils/cmpcodesize/cmpcodesize/regex.py b/utils/cmpcodesize/cmpcodesize/regex.py
new file mode 100644
index 0000000000000..d6bd362009a34
--- /dev/null
+++ b/utils/cmpcodesize/cmpcodesize/regex.py
@@ -0,0 +1,26 @@
+import re
+
+
+# Cache the compiled regex into a global object.
+_ARCHITECTURE_REGEX = re.compile('architecture (\S+)')
+
+
+def architecture(fat_headers):
+    """
+    Given a string representing fat headers from an executable,
+    returns one of the following:
+
+    1. arm64, if that is one of the architectures listed.
+    2. If arm64 us not listed, the first architecture that is listed.
+    3. None, if no architectures are listed.
+    """
+    result = None
+    for line in fat_headers.splitlines():
+        match = _ARCHITECTURE_REGEX.match(line)
+        if match:
+            arch = match.group(1)
+            if arch == 'arm64':
+                return arch
+            elif result is None:
+                result = match.group(1)
+    return result
diff --git a/utils/cmpcodesize/tests/test_regex.py b/utils/cmpcodesize/tests/test_regex.py
new file mode 100644
index 0000000000000..73c9d384f45e0
--- /dev/null
+++ b/utils/cmpcodesize/tests/test_regex.py
@@ -0,0 +1,41 @@
+import unittest
+
+from cmpcodesize import regex
+
+
+class ArchitectureTestCase(unittest.TestCase):
+    def test_no_architectures_listed_returns_none(self):
+        headers = 'Fat headers\n' + \
+                  '    cputype CPU_TYPE_X86_64\n'
+        self.assertIsNone(regex.architecture(headers))
+
+    def test_arm64_listed_returns_arm64(self):
+        headers = 'Fat headers\n' + \
+                  'architecture x86_64\n' + \
+                  '    cputype CPU_TYPE_X86_64\n' + \
+                  'architecture arm64\n' + \
+                  '    cputype CPU_TYPE_ARM64\n'
+        self.assertEquals(regex.architecture(headers), 'arm64')
+
+    def test_arm64_not_listed_returns_first(self):
+        headers = 'Fat headers\n' + \
+                  'architecture x86_64\n' + \
+                  '    cputype CPU_TYPE_X86_64\n' + \
+                  'architecture i386\n' + \
+                  '    cputype CPU_TYPE_I386\n'
+        self.assertEquals(regex.architecture(headers), 'x86_64')
+
+    def test_libswiftcore_x86_64(self):
+        # These are the headers for libswiftCore.dylib when built
+        # for a Darwin x86_64 target.
+        headers = 'Fat headers\n' + \
+                  'fat_magic FAT_MAGIC\n' + \
+                  'nfat_arch 1\n' + \
+                  'architecture x86_64\n' + \
+                  '    cputype CPU_TYPE_X86_64\n' + \
+                  '    cpusubtype CPU_SUBTYPE_X86_64_ALL\n' + \
+                  '    capabilities 0x0\n' + \
+                  '    offset 4096\n' + \
+                  '    size 9029488\n' + \
+                  '    align 2^12 (4096)\n'
+        self.assertEquals(regex.architecture(headers), 'x86_64')

From 365d9bdffa5db1729b38c83fb4e1ab9d8419ffed Mon Sep 17 00:00:00 2001
From: Brian Gesiak <modocache@gmail.com>
Date: Sat, 19 Dec 2015 01:41:41 -0500
Subject: [PATCH 08/12] [cmpcodesize] Fix early return in --list

Fixes a bug, introduced in a1f6040c83d62a3ec354bba398f902b66154ec41,
in which `cmpcodesize -l /path/to/file` would only output the *first* label,
instead of listing all of them.
---
 utils/cmpcodesize/cmpcodesize/compare.py      |  2 +-
 utils/cmpcodesize/cmpcodesize/main.py         |  2 +-
 .../tests/test_list_function_sizes.py         | 19 +++++++++++++++----
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/utils/cmpcodesize/cmpcodesize/compare.py b/utils/cmpcodesize/cmpcodesize/compare.py
index 0ed433f057a30..7aa8acc2db1c2 100644
--- a/utils/cmpcodesize/cmpcodesize/compare.py
+++ b/utils/cmpcodesize/cmpcodesize/compare.py
@@ -188,7 +188,7 @@ def listFunctionSizes(sizeArray):
     for pair in sorted(sizeArray, key=itemgetter(1)):
         name = pair[0]
         size = pair[1]
-        return "%8d %s" % (size, name)
+        yield "%8d %s" % (size, name)
 
 
 def compareFunctionSizes(oldFiles, newFiles):
diff --git a/utils/cmpcodesize/cmpcodesize/main.py b/utils/cmpcodesize/cmpcodesize/main.py
index d3d7271c9d1b6..b2fb49c5f028b 100644
--- a/utils/cmpcodesize/cmpcodesize/main.py
+++ b/utils/cmpcodesize/cmpcodesize/main.py
@@ -177,7 +177,7 @@ def main():
             sizes = collections.defaultdict(int)
             for file in oldFiles:
                 readSizes(sizes, file, True, False)
-            print(listFunctionSizes(sizes.items()))
+            print(os.linesep.join(listFunctionSizes(sizes.items())))
         else:
             compareFunctionSizes(oldFiles, newFiles)
     else:
diff --git a/utils/cmpcodesize/tests/test_list_function_sizes.py b/utils/cmpcodesize/tests/test_list_function_sizes.py
index e697a9ee05c8e..e0c8f2691873d 100644
--- a/utils/cmpcodesize/tests/test_list_function_sizes.py
+++ b/utils/cmpcodesize/tests/test_list_function_sizes.py
@@ -14,13 +14,24 @@
 
 
 class ListFunctionSizesTestCase(unittest.TestCase):
-    def test_when_size_array_is_none_raises(self):
+    def test_when_sizes_is_none_raises(self):
         with self.assertRaises(TypeError):
-            listFunctionSizes(None)
+            list(listFunctionSizes(None))
 
-    def test_when_size_array_is_empty_returns_none(self):
-        self.assertIsNone(listFunctionSizes([]))
+    def test_when_sizes_is_empty_returns_none(self):
+        self.assertEqual(list(listFunctionSizes([])), [])
 
+    def test_lists_each_entry(self):
+        sizes = {
+            'foo': 1,
+            'bar': 10,
+            'baz': 100,
+        }
+        self.assertEqual(list(listFunctionSizes(sizes.items())), [
+            '       1 foo',
+            '      10 bar',
+            '     100 baz',
+        ])
 
 if __name__ == '__main__':
     unittest.main()

From 6001ddd696ce4647b28da60a42e63729b03c5f6b Mon Sep 17 00:00:00 2001
From: Brian Gesiak <modocache@gmail.com>
Date: Tue, 29 Dec 2015 23:04:07 -0500
Subject: [PATCH 09/12] Finish moving regexes

---
 utils/cmpcodesize/cmpcodesize/compare.py | 52 ++++++++-----------
 utils/cmpcodesize/cmpcodesize/regex.py   | 66 +++++++++++++++++++++++-
 utils/cmpcodesize/tests/test_regex.py    | 41 +++++++++++++--
 3 files changed, 123 insertions(+), 36 deletions(-)

diff --git a/utils/cmpcodesize/cmpcodesize/compare.py b/utils/cmpcodesize/cmpcodesize/compare.py
index 7aa8acc2db1c2..b6738c8b3c505 100644
--- a/utils/cmpcodesize/cmpcodesize/compare.py
+++ b/utils/cmpcodesize/cmpcodesize/compare.py
@@ -83,52 +83,42 @@ def readSizes(sizes, fileName, functionDetails, groupByPrefix):
     fat_headers = otool.fat_headers(fileName)
     architecture = regex.architecture(fat_headers)
     if functionDetails:
-        content = otool.load_commands(fileName,
-                                      architecture=architecture,
-                                      include_text_sections=True).split('\n')
-        content += otool.text_sections(fileName,
-                                       architecture=architecture).split('\n')
+        content = otool.load_commands(fileName, architecture=architecture,
+                                      include_text_sections=True)
+        content += otool.text_sections(fileName, architecture=architecture)
     else:
-        content = otool.load_commands(fileName,
-                                      architecture=architecture).split('\n')
+        content = otool.load_commands(fileName, architecture=architecture)
 
     sectName = None
     currFunc = None
     startAddr = None
     endAddr = None
 
-    # FIXME: Move re calls into cmpcodesize.regex module.
-    sectionPattern = re.compile(' +sectname ([\S]+)')
-    sizePattern = re.compile(' +size ([\da-fx]+)')
-    asmlinePattern = re.compile('^([0-9a-fA-F]+)\s')
-    labelPattern = re.compile('^((\-*\[[^\]]*\])|[^\/\s]+):$')
-
-    for line in content:
-        asmlineMatch = asmlinePattern.match(line)
-        if asmlineMatch:
-            addr = int(asmlineMatch.group(1), 16)
+    for line in content.splitlines():
+        asm = regex.address(line)
+        if asm:
             if startAddr is None:
-                startAddr = addr
-            endAddr = addr
-        elif line == "Section":
+                startAddr = asm
+            endAddr = asm
+        elif line == 'Section':
             sectName = None
         else:
-            labelMatch = labelPattern.match(line)
-            sizeMatch = sizePattern.match(line)
-            sectionMatch = sectionPattern.match(line)
-            if labelMatch:
-                funcName = labelMatch.group(1)
+            label = regex.label(line)
+            size = regex.size(line)
+            section = regex.section(line)
+            if label:
+                funcName = label
                 addFunction(sizes, currFunc, startAddr, endAddr, groupByPrefix)
                 currFunc = funcName
                 startAddr = None
                 endAddr = None
-            elif sizeMatch and sectName and groupByPrefix:
-                size = int(sizeMatch.group(1), 16)
+            elif size and sectName and groupByPrefix:
+                print(line)
                 sizes[sectName] += size
-            elif sectionMatch:
-                sectName = sectionMatch.group(1)
-                if sectName == "__textcoal_nt":
-                    sectName = "__text"
+            elif section:
+                sectName = section
+                if sectName == '__textcoal_nt':
+                    sectName = '__text'
 
     addFunction(sizes, currFunc, startAddr, endAddr, groupByPrefix)
 
diff --git a/utils/cmpcodesize/cmpcodesize/regex.py b/utils/cmpcodesize/cmpcodesize/regex.py
index d6bd362009a34..b7e4c5ad6bfea 100644
--- a/utils/cmpcodesize/cmpcodesize/regex.py
+++ b/utils/cmpcodesize/cmpcodesize/regex.py
@@ -1,8 +1,12 @@
 import re
 
 
-# Cache the compiled regex into a global object.
-_ARCHITECTURE_REGEX = re.compile('architecture (\S+)')
+# Cache the compiled regular expressions into global objects.
+_ARCHITECTURE_REGEX = re.compile('architecture\s(\S+)')
+_SECTION_REGEX = re.compile('\s+sectname\s(\S+)')
+_SIZE_REGEX = re.compile('\s+size\s([\da-fA-Fx]+)')
+_ADDRESS_REGEX = re.compile('^([\da-fA-F]+)\s')
+_LABEL_REGEX = re.compile('^((\-*\[[^\]]*\])|[^\/\s]+):$')
 
 
 def architecture(fat_headers):
@@ -24,3 +28,61 @@ def architecture(fat_headers):
             elif result is None:
                 result = match.group(1)
     return result
+
+
+def section(line):
+    """
+    Given a string representing a line from a series of load commands,
+    returns the section name if specified. For example, the line
+    '  sectname __common' returns '__common'. If the line does not contain
+    a section name, returns None.
+    """
+    match = _SECTION_REGEX.match(line)
+    if match:
+        return match.group(1)
+    else:
+        return None
+
+
+def size(line):
+    """
+    Given a string representing a line from a series of load commands,
+    returns an integer representing the size, if the size appears in the line.
+    For example, the line '      size 0x0000000000000810' returns 2064.
+    If no size appears in the line, returns None.
+    """
+    match = _SIZE_REGEX.match(line)
+    if match:
+        return int(match.group(1), 16)
+    else:
+        return None
+
+
+def address(line):
+    """
+    Given a string representing a line from a series of load commands,
+    returns an integer representing the address of the command, if the address
+    appears in the line. For example, the line
+    '0000000000051e0a    movq    -0xe8(%rbp), %r10' returns 335370.
+    If no address appears in the line, returns None.
+    """
+    match = _ADDRESS_REGEX.match(line)
+    if match:
+        return int(match.group(1), 16)
+    else:
+        return None
+
+
+def label(line):
+    """
+    Given a string representing a line from a series of load commands,
+    returns the label marking the beginning of a section of commands, if the
+    label appears in the line. For example, the line
+    '_swift_stdlib_readLine_stdin:' returns '_swift_stdlib_readLine_stdin'.
+    If no label appears in the line, returns None.
+    """
+    match = _LABEL_REGEX.match(line)
+    if match:
+        return match.group(1)
+    else:
+        return None
diff --git a/utils/cmpcodesize/tests/test_regex.py b/utils/cmpcodesize/tests/test_regex.py
index 73c9d384f45e0..cdcc95dc6b026 100644
--- a/utils/cmpcodesize/tests/test_regex.py
+++ b/utils/cmpcodesize/tests/test_regex.py
@@ -15,7 +15,7 @@ def test_arm64_listed_returns_arm64(self):
                   '    cputype CPU_TYPE_X86_64\n' + \
                   'architecture arm64\n' + \
                   '    cputype CPU_TYPE_ARM64\n'
-        self.assertEquals(regex.architecture(headers), 'arm64')
+        self.assertEqual(regex.architecture(headers), 'arm64')
 
     def test_arm64_not_listed_returns_first(self):
         headers = 'Fat headers\n' + \
@@ -23,7 +23,7 @@ def test_arm64_not_listed_returns_first(self):
                   '    cputype CPU_TYPE_X86_64\n' + \
                   'architecture i386\n' + \
                   '    cputype CPU_TYPE_I386\n'
-        self.assertEquals(regex.architecture(headers), 'x86_64')
+        self.assertEqual(regex.architecture(headers), 'x86_64')
 
     def test_libswiftcore_x86_64(self):
         # These are the headers for libswiftCore.dylib when built
@@ -38,4 +38,39 @@ def test_libswiftcore_x86_64(self):
                   '    offset 4096\n' + \
                   '    size 9029488\n' + \
                   '    align 2^12 (4096)\n'
-        self.assertEquals(regex.architecture(headers), 'x86_64')
+        self.assertEqual(regex.architecture(headers), 'x86_64')
+
+
+class SectionTestCase(unittest.TestCase):
+    def test_no_section_included_returns_none(self):
+        self.assertIsNone(regex.section('size 0x0001'))
+
+    def test_section_included_returns_section(self):
+        self.assertEqual(regex.section('  sectname __data'), '__data')
+
+
+class SizeTestCase(unittest.TestCase):
+    def test_no_size_included_returns_none(self):
+        self.assertIsNone(regex.size('  sectname __objc_data'))
+
+    def test_size_included_returns_section(self):
+        self.assertEqual(regex.size('      size 0x0000000000000768'),
+                         1896)
+
+
+class AddressTestCase(unittest.TestCase):
+    def test_no_address_included_returns_none(self):
+        self.assertIsNone(regex.address('size 0x0001'))
+
+    def test_address_included_returns_section(self):
+        self.assertEqual(regex.address('00000000001fa370    callq   0x47a8c0'),
+                         2073456)
+
+
+class LabelTestCase(unittest.TestCase):
+    def test_no_label_included_returns_none(self):
+        self.assertIsNone(regex.label('size 0x0001'))
+
+    def test_label_included_returns_section(self):
+        self.assertEqual(regex.label('__ZNK4llvm9StringRef5splitEc:'),
+                         '__ZNK4llvm9StringRef5splitEc')

From 085646608a81f802b758b168dd8a0f8923003153 Mon Sep 17 00:00:00 2001
From: Brian Gesiak <modocache@gmail.com>
Date: Wed, 30 Dec 2015 00:58:34 -0500
Subject: [PATCH 10/12] use parser

---
 utils/cmpcodesize/cmpcodesize/compare.py      |  47 +--
 utils/cmpcodesize/cmpcodesize/parser.py       |  87 +++++
 .../fixtures/test_parser_test_fixture.txt     | 357 ++++++++++++++++++
 utils/cmpcodesize/tests/test_parser.py        |  32 ++
 4 files changed, 489 insertions(+), 34 deletions(-)
 create mode 100644 utils/cmpcodesize/cmpcodesize/parser.py
 create mode 100644 utils/cmpcodesize/tests/fixtures/test_parser_test_fixture.txt
 create mode 100644 utils/cmpcodesize/tests/test_parser.py

diff --git a/utils/cmpcodesize/cmpcodesize/compare.py b/utils/cmpcodesize/cmpcodesize/compare.py
index b6738c8b3c505..03049a27c2be2 100644
--- a/utils/cmpcodesize/cmpcodesize/compare.py
+++ b/utils/cmpcodesize/cmpcodesize/compare.py
@@ -15,7 +15,7 @@
 import collections
 from operator import itemgetter
 
-from cmpcodesize import otool, regex
+from cmpcodesize import otool, parser, regex
 
 Prefixes = {
     # Cpp
@@ -78,8 +78,11 @@ def addFunction(sizes, function, startAddr, endAddr, groupByPrefix):
 
 
 def readSizes(sizes, fileName, functionDetails, groupByPrefix):
-    # Check if multiple architectures are supported by the object file.
-    # Prefer arm64 if available.
+    """
+    Run 'otool' on the Mach-O file with the given 'fileName'. Read the regions
+    of the output (sections and labels) and store their sizes in the 'sizes'
+    dict.
+    """
     fat_headers = otool.fat_headers(fileName)
     architecture = regex.architecture(fat_headers)
     if functionDetails:
@@ -89,38 +92,14 @@ def readSizes(sizes, fileName, functionDetails, groupByPrefix):
     else:
         content = otool.load_commands(fileName, architecture=architecture)
 
-    sectName = None
-    currFunc = None
-    startAddr = None
-    endAddr = None
-
-    for line in content.splitlines():
-        asm = regex.address(line)
-        if asm:
-            if startAddr is None:
-                startAddr = asm
-            endAddr = asm
-        elif line == 'Section':
-            sectName = None
+    for region in parser.parse(content):
+        if isinstance(region, parser.MachOSection):
+            if region.name == '__textcoal_nt':
+                region.name = '__text'
+            sizes[region.name] += region.size
         else:
-            label = regex.label(line)
-            size = regex.size(line)
-            section = regex.section(line)
-            if label:
-                funcName = label
-                addFunction(sizes, currFunc, startAddr, endAddr, groupByPrefix)
-                currFunc = funcName
-                startAddr = None
-                endAddr = None
-            elif size and sectName and groupByPrefix:
-                print(line)
-                sizes[sectName] += size
-            elif section:
-                sectName = section
-                if sectName == '__textcoal_nt':
-                    sectName = '__text'
-
-    addFunction(sizes, currFunc, startAddr, endAddr, groupByPrefix)
+            addFunction(sizes, region.name, region.start_address,
+                        region.end_address, groupByPrefix)
 
 
 def compareSizes(oldSizes, newSizes, nameKey, title):
diff --git a/utils/cmpcodesize/cmpcodesize/parser.py b/utils/cmpcodesize/cmpcodesize/parser.py
new file mode 100644
index 0000000000000..4128c55aa4311
--- /dev/null
+++ b/utils/cmpcodesize/cmpcodesize/parser.py
@@ -0,0 +1,87 @@
+from __future__ import absolute_import
+
+import collections
+
+from . import regex
+
+
+MachOLabel = collections.namedtuple('MachOLabel',
+                                    'name start_address end_address')
+MachOSection = collections.namedtuple('MachOSection', 'name size')
+
+
+def parse(content):
+    """
+    Enumerates each line of the content, yielding objects that represent the
+    span of assembly for sections and labels.
+    """
+    # We enumerate each line of the content, yielding objects
+    # that represent the span of assembly for sections...
+    current_section_name = None
+    current_section_size = 0
+    # ...as well as for labels.
+    current_label_name = None
+    current_label_start_address = None
+    # Throughout the process we'll keep track of our current address.
+    current_address = None
+
+    for line in content.splitlines():
+        # Each line falls into one of these four categories.
+        address = regex.address(line)
+        label = regex.label(line)
+        size = regex.size(line)
+        section = regex.section(line)
+
+        if address is not None:
+            # We're reading more assembly for the current label or section.
+            # Update our current address.
+            current_address = address
+            # If we haven't yet recorded a start address for this label,
+            # use the current address.
+            if current_label_start_address is None:
+                current_label_start_address = address
+
+        elif section is not None:
+            # We're entering a new section. If we're currently measuring the
+            # span of a section, we may yield that measurement.
+            if current_section_name is not None:
+                yield MachOSection(
+                    name=current_section_name,
+                    size=current_section_size)
+            # Now that we've yielded the previous section, we update
+            # the current section name and reset its start address.
+            current_section_name = section
+            current_section_size = 0
+
+        elif size is not None:
+            # We're reading more sizes for our current section.
+            current_section_size += size
+
+        elif label is not None:
+            # We've encountered a new label. If we're currently measuring the
+            # span of a label, we may yield that measurement.
+            if (current_label_name is not None and
+                    current_label_start_address is not None and
+                    current_address is not None):
+                yield MachOLabel(
+                    name=current_label_name,
+                    start_address=current_label_start_address,
+                    end_address=current_address)
+            # Now that we've yielded the previous label, we update
+            # the current label name and reset its start address.
+            current_label_name = label
+            current_label_start_address = None
+
+    # Finally, we yield the last region we were parsing when we reached the
+    # end of the lines.
+    if (current_label_name is not None and
+            current_label_start_address is not None and
+            current_address is not None):
+        yield MachOLabel(
+            name=current_label_name,
+            start_address=current_label_start_address,
+            end_address=current_address)
+    if current_section_name is not None:
+        yield MachOSection(
+            name=current_section_name,
+            size=current_section_size)
diff --git a/utils/cmpcodesize/tests/fixtures/test_parser_test_fixture.txt b/utils/cmpcodesize/tests/fixtures/test_parser_test_fixture.txt
new file mode 100644
index 0000000000000..0c713fce08ea1
--- /dev/null
+++ b/utils/cmpcodesize/tests/fixtures/test_parser_test_fixture.txt
@@ -0,0 +1,357 @@
+Load command 0
+      cmd LC_SEGMENT_64
+  cmdsize 1192
+  segname __TEXT
+   vmaddr 0x0000000000000000
+   vmsize 0x00000000004ed000
+  fileoff 0
+ filesize 5165056
+  maxprot rwx
+ initprot r-x
+   nsects 14
+    flags (none)
+Section
+  sectname __text
+   segname __TEXT
+      addr 0x0000000000002a10
+      size 0x00000000004cdb75
+    offset 10768
+     align 2^4 (16)
+    reloff 0
+    nreloc 0
+      type S_REGULAR
+attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+ reserved1 0
+ reserved2 0
+Section
+  sectname __stubs
+   segname __TEXT
+      addr 0x00000000004d0586
+      size 0x00000000000004aa
+    offset 5047686
+     align 2^1 (2)
+    reloff 0
+    nreloc 0
+      type S_SYMBOL_STUBS
+attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+ reserved1 0 (index into indirect symbol table)
+ reserved2 6 (size of stubs)
+Section
+  sectname __bss
+   segname __DATA
+      addr 0x000000000051cb80
+      size 0x0000000000000768
+    offset 0
+     align 2^3 (8)
+    reloff 0
+    nreloc 0
+      type S_ZEROFILL
+attributes (none)
+ reserved1 0
+ reserved2 0
+Load command 2
+      cmd LC_SEGMENT_64
+  cmdsize 72
+  segname __LINKEDIT
+   vmaddr 0x000000000051e000
+   vmsize 0x0000000000380000
+  fileoff 5361664
+ filesize 3667824
+  maxprot rwx
+ initprot r--
+   nsects 0
+    flags (none)
+Load command 3
+          cmd LC_ID_DYLIB
+      cmdsize 56
+         name @rpath/libswiftCore.dylib (offset 24)
+   time stamp 1 Wed Dec 31 19:00:01 1969
+      current version 0.0.0
+compatibility version 0.0.0
+Load command 4
+            cmd LC_DYLD_INFO_ONLY
+        cmdsize 48
+     rebase_off 5361664
+    rebase_size 5592
+       bind_off 5367256
+      bind_size 1376
+  weak_bind_off 5368632
+ weak_bind_size 88
+  lazy_bind_off 5368720
+ lazy_bind_size 3792
+     export_off 5372512
+    export_size 411520
+Load command 5
+     cmd LC_SYMTAB
+ cmdsize 24
+  symoff 5805504
+   nsyms 74393
+  stroff 6997448
+ strsize 2032040
+Load command 6
+            cmd LC_DYSYMTAB
+        cmdsize 80
+      ilocalsym 0
+      nlocalsym 64672
+     iextdefsym 64672
+     nextdefsym 9541
+      iundefsym 74213
+      nundefsym 180
+         tocoff 0
+           ntoc 0
+      modtaboff 0
+        nmodtab 0
+   extrefsymoff 0
+    nextrefsyms 0
+ indirectsymoff 6995792
+  nindirectsyms 414
+      extreloff 0
+        nextrel 0
+      locreloff 0
+        nlocrel 0
+Load command 7
+     cmd LC_UUID
+ cmdsize 24
+    uuid 03B6D073-299B-3D21-A3E4-A8F78533DFC6
+Load command 8
+      cmd LC_VERSION_MIN_MACOSX
+  cmdsize 16
+  version 10.9
+      sdk 10.11
+Load command 9
+      cmd LC_SOURCE_VERSION
+  cmdsize 16
+  version 0.0
+Load command 10
+          cmd LC_LOAD_DYLIB
+      cmdsize 56
+         name /usr/lib/libobjc.A.dylib (offset 24)
+   time stamp 2 Wed Dec 31 19:00:02 1969
+      current version 228.0.0
+compatibility version 1.0.0
+Load command 11
+          cmd LC_LOAD_DYLIB
+      cmdsize 96
+         name /System/Library/Frameworks/Foundation.framework/Versions/C/Foundation (offset 24)
+   time stamp 2 Wed Dec 31 19:00:02 1969
+      current version 1256.1.0
+compatibility version 300.0.0
+Load command 12
+          cmd LC_LOAD_DYLIB
+      cmdsize 104
+         name /System/Library/Frameworks/CoreFoundation.framework/Versions/A/CoreFoundation (offset 24)
+   time stamp 2 Wed Dec 31 19:00:02 1969
+      current version 1256.14.0
+compatibility version 150.0.0
+Load command 13
+          cmd LC_LOAD_DYLIB
+      cmdsize 48
+         name /usr/lib/libc++.1.dylib (offset 24)
+   time stamp 2 Wed Dec 31 19:00:02 1969
+      current version 120.1.0
+compatibility version 1.0.0
+Load command 14
+          cmd LC_LOAD_DYLIB
+      cmdsize 56
+         name /usr/lib/libSystem.B.dylib (offset 24)
+   time stamp 2 Wed Dec 31 19:00:02 1969
+      current version 1226.10.1
+compatibility version 1.0.0
+Load command 15
+      cmd LC_FUNCTION_STARTS
+  cmdsize 16
+  dataoff 5784032
+ datasize 21472
+Load command 16
+      cmd LC_DATA_IN_CODE
+  cmdsize 16
+  dataoff 5805504
+ datasize 0
+(__TEXT,__text) section
+__TFs10minElementuRxs12__:
+0000000000002a10	pushq	%rbp
+0000000000002a11	movq	%rsp, %rbp
+0000000000002a14	pushq	%r14
+0000000000002a16	pushq	%rbx
+0000000000002a17	subq	$0x80, %rsp
+0000000000002a1e	movq	0x20(%rbp), %rax
+0000000000002a22	movq	0x10(%rbp), %rcx
+0000000000002a26	movq	%rdx, -0x18(%rbp)
+0000000000002a2a	movq	%r8, -0x20(%rbp)
+0000000000002a2e	movq	%rcx, -0x28(%rbp)
+0000000000002a32	movq	%rax, -0x30(%rbp)
+0000000000002a36	movq	%rsi, -0x38(%rbp)
+0000000000002a3a	movq	%rdi, -0x40(%rbp)
+0000000000002a3e	jmp	0x2a40
+0000000000002a40	jmp	0x2a42
+0000000000002a42	jmp	0x2a44
+0000000000002a44	jmp	0x2a46
+0000000000002a46	jmp	0x2a48
+0000000000002a48	jmp	0x2a4a
+0000000000002a4a	jmp	0x2a4c
+0000000000002a4c	leaq	0x4d463d(%rip), %rdi
+0000000000002a53	movl	$0x24, %eax
+0000000000002a58	movl	%eax, %esi
+0000000000002a5a	movl	$0x1, %edx
+0000000000002a5f	callq	0x2b90
+0000000000002a64	leaq	0x4d460a(%rip), %rdi
+0000000000002a6b	movl	$0xb, %r8d
+0000000000002a71	movl	%r8d, %esi
+0000000000002a74	movl	$0x2, %r8d
+0000000000002a7a	leaq	0x4d45af(%rip), %r9
+0000000000002a81	movl	$0x44, %r10d
+0000000000002a87	movl	%r10d, %r11d
+0000000000002a8a	movl	$0x15, %r10d
+0000000000002a90	movl	%r10d, %ebx
+0000000000002a93	movq	%rdx, -0x48(%rbp)
+0000000000002a97	movl	%r8d, %edx
+0000000000002a9a	movq	%rcx, -0x50(%rbp)
+0000000000002a9e	movq	%rax, %rcx
+0000000000002aa1	movq	-0x48(%rbp), %r8
+0000000000002aa5	movq	-0x50(%rbp), %rax
+0000000000002aa9	movq	%r9, -0x58(%rbp)
+0000000000002aad	movq	%rax, %r9
+0000000000002ab0	movq	-0x58(%rbp), %r14
+0000000000002ab4	movq	%r14, (%rsp)
+0000000000002ab8	movq	$0x44, 0x8(%rsp)
+0000000000002ac1	movl	$0x2, 0x10(%rsp)
+0000000000002ac9	movq	$0x15, 0x18(%rsp)
+0000000000002ad2	movq	%r11, -0x60(%rbp)
+0000000000002ad6	movq	%rbx, -0x68(%rbp)
+0000000000002ada	callq	0x84e00
+0000000000002adf	nop
+__TFSSCfT21_builtinString:
+0000000000002b90	pushq	%rbp
+0000000000002b91	movq	%rsp, %rbp
+0000000000002b94	pushq	%r15
+0000000000002b96	pushq	%r14
+0000000000002b98	pushq	%r13
+0000000000002b9a	pushq	%r12
+0000000000002b9c	pushq	%rbx
+0000000000002b9d	subq	$0x178, %rsp            ## imm = 0x178
+0000000000002ba4	movb	%dl, %al
+0000000000002ba6	movq	%rdi, -0x48(%rbp)
+0000000000002baa	movq	%rsi, -0x50(%rbp)
+0000000000002bae	movb	%al, %cl
+0000000000002bb0	andb	$0x1, %cl
+0000000000002bb3	movb	%cl, -0x58(%rbp)
+0000000000002bb6	testb	$0x1, %al
+0000000000002bb8	movq	%rsi, -0x78(%rbp)
+0000000000002bbc	movq	%rdi, -0x80(%rbp)
+0000000000002bc0	jne	0x2bc4
+0000000000002bc2	jmp	0x2c02
+0000000000002bc4	xorl	%eax, %eax
+0000000000002bc6	movl	%eax, %edx
+0000000000002bc8	xorl	%ecx, %ecx
+0000000000002bca	movq	$0x0, -0x70(%rbp)
+0000000000002bd2	movq	-0x70(%rbp), %r8
+0000000000002bd6	movq	-0x80(%rbp), %rdi
+0000000000002bda	movq	-0x78(%rbp), %rsi
+0000000000002bde	callq	0x2dbae0
+0000000000002be3	movq	%rax, %rdi
+0000000000002be6	movq	%rdx, %rsi
+0000000000002be9	movq	%rcx, %rdx
+0000000000002bec	callq	0x9c9c0
+0000000000002bf1	movq	%rax, -0x40(%rbp)
+0000000000002bf5	movq	%rdx, -0x38(%rbp)
+0000000000002bf9	movq	%rcx, -0x30(%rbp)
+0000000000002bfd	jmp	0x2dc6
+0000000000002c02	leaq	0x4f9d87(%rip), %rax
+0000000000002c09	addq	$0x8, %rax
+0000000000002c0d	movq	-0x80(%rbp), %rdi
+0000000000002c11	movq	-0x78(%rbp), %rsi
+0000000000002c15	movq	%rax, %rdx
+0000000000002c18	callq	0x2cb50
+0000000000002c1d	leaq	-0x68(%rbp), %rsi
+0000000000002c21	movq	%rax, -0x68(%rbp)
+0000000000002c25	movq	%rdx, -0x60(%rbp)
+0000000000002c29	movq	%rsi, -0x88(%rbp)
+0000000000002c30	callq	0x91d70
+0000000000002c35	movq	%rax, -0x90(%rbp)
+0000000000002c3c	callq	0x91dc0
+0000000000002c41	movq	%rax, -0x98(%rbp)
+0000000000002c48	callq	0x91e10
+0000000000002c4d	movq	%rax, -0xa0(%rbp)
+0000000000002c54	callq	0x91ec0
+0000000000002c59	leaq	0x501c88(%rip), %rdx
+0000000000002c60	addq	$0x8, %rdx
+0000000000002c64	leaq	0x4f48ad(%rip), %rcx
+0000000000002c6b	leaq	0x4f4d86(%rip), %r9
+0000000000002c72	leaq	0x4f9d17(%rip), %rsi
+0000000000002c79	addq	$0x8, %rsi
+0000000000002c7d	leaq	0x4f4afc(%rip), %rdi
+0000000000002c84	leaq	0x4fa64d(%rip), %r8
+0000000000002c8b	addq	$0x8, %r8
+0000000000002c8f	leaq	0x4f0d62(%rip), %r10
+0000000000002c96	leaq	0x4f0c03(%rip), %r11
+0000000000002c9d	leaq	0x4f0c64(%rip), %rbx
+0000000000002ca4	leaq	0x4f370d(%rip), %r14
+0000000000002cab	leaq	0x4f3736(%rip), %r15
+0000000000002cb2	leaq	0x4eec27(%rip), %r12
+0000000000002cb9	movq	%rdi, -0xa8(%rbp)
+0000000000002cc0	movq	%rdx, %rdi
+0000000000002cc3	movq	-0x88(%rbp), %r13
+0000000000002cca	movq	%rsi, -0xb0(%rbp)
+0000000000002cd1	movq	%r13, %rsi
+0000000000002cd4	movq	-0x90(%rbp), %r13
+0000000000002cdb	movq	%r8, -0xb8(%rbp)
+0000000000002ce2	movq	%r13, %r8
+0000000000002ce5	movq	-0xb0(%rbp), %r13
+0000000000002cec	movq	%r13, (%rsp)
+0000000000002cf0	movq	-0x98(%rbp), %r13
+0000000000002cf7	movq	%r13, 0x8(%rsp)
+0000000000002cfc	movq	-0xa8(%rbp), %r13
+0000000000002d03	movq	%r13, 0x10(%rsp)
+0000000000002d08	movq	-0xb8(%rbp), %r13
+0000000000002d0f	movq	%r13, 0x18(%rsp)
+0000000000002d14	movq	%r10, 0x20(%rsp)
+0000000000002d19	movq	%r13, 0x28(%rsp)
+0000000000002d1e	movq	%r11, 0x30(%rsp)
+0000000000002d23	movq	%r13, 0x38(%rsp)
+0000000000002d28	movq	%rbx, 0x40(%rsp)
+0000000000002d2d	movq	%r13, 0x48(%rsp)
+0000000000002d32	movq	-0xa0(%rbp), %r13
+0000000000002d39	movq	%r13, 0x50(%rsp)
+0000000000002d3e	movq	%r14, 0x58(%rsp)
+0000000000002d43	movq	%r15, 0x60(%rsp)
+0000000000002d48	movq	%rax, 0x68(%rsp)
+0000000000002d4d	movq	%r12, 0x70(%rsp)
+0000000000002d52	movq	-0xb0(%rbp), %rax
+0000000000002d59	movq	%rax, 0x78(%rsp)
+0000000000002d5e	movq	-0xb8(%rbp), %r14
+0000000000002d65	movq	%r14, 0x80(%rsp)
+0000000000002d6d	movq	%r10, 0x88(%rsp)
+0000000000002d75	movq	%r14, 0x90(%rsp)
+0000000000002d7d	movq	%r11, 0x98(%rsp)
+0000000000002d85	movq	%r14, 0xa0(%rsp)
+0000000000002d8d	movq	%rbx, 0xa8(%rsp)
+0000000000002d95	movq	%r14, 0xb0(%rsp)
+0000000000002d9d	movq	%r13, 0xb8(%rsp)
+0000000000002da5	movq	%rax, 0xc0(%rsp)
+0000000000002dad	movq	%rax, 0xc8(%rsp)
+0000000000002db5	callq	0x91780
+0000000000002dba	movq	%rax, -0x40(%rbp)
+0000000000002dbe	movq	%rdx, -0x38(%rbp)
+0000000000002dc2	movq	%rcx, -0x30(%rbp)
+0000000000002dc6	movq	-0x40(%rbp), %rax
+0000000000002dca	movq	-0x38(%rbp), %rdx
+0000000000002dce	movq	-0x30(%rbp), %rcx
+0000000000002dd2	movq	%rcx, %rdi
+0000000000002dd5	movq	%rax, -0xc0(%rbp)
+0000000000002ddc	movq	%rdx, -0xc8(%rbp)
+0000000000002de3	movq	%rcx, -0xd0(%rbp)
+0000000000002dea	callq	0x4a26a0
+0000000000002def	movq	-0x30(%rbp), %rdi
+0000000000002df3	callq	0x4a2710
+0000000000002df8	movq	-0xc0(%rbp), %rax
+0000000000002dff	movq	-0xc8(%rbp), %rdx
+0000000000002e06	movq	-0xd0(%rbp), %rcx
+0000000000002e0d	addq	$0x178, %rsp            ## imm = 0x178
+0000000000002e14	popq	%rbx
+0000000000002e15	popq	%r12
+0000000000002e17	popq	%r13
+0000000000002e19	popq	%r14
+0000000000002e1b	popq	%r15
+0000000000002e1d	popq	%rbp
+0000000000002e1e	retq
+0000000000002e1f	nop
\ No newline at end of file
diff --git a/utils/cmpcodesize/tests/test_parser.py b/utils/cmpcodesize/tests/test_parser.py
new file mode 100644
index 0000000000000..05dd01bb9f3bc
--- /dev/null
+++ b/utils/cmpcodesize/tests/test_parser.py
@@ -0,0 +1,32 @@
+import os
+import unittest
+
+from cmpcodesize.parser import parse
+
+
+class ParseTestCase(unittest.TestCase):
+    def setUp(self):
+        self.fixtures_dir = os.path.join(os.path.dirname(__file__), 'fixtures')
+
+    def test_fixture(self):
+        fixture = os.path.join(
+            self.fixtures_dir, 'test_parser_test_fixture.txt')
+        with open(fixture) as f:
+            regions = list(parse(f.read()))
+
+            self.assertEqual(regions[0].name, '__text')
+            self.assertEqual(regions[0].size, 5036917)
+
+            self.assertEqual(regions[1].name, '__stubs')
+            self.assertEqual(regions[1].size, 1194)
+
+            self.assertEqual(regions[2].name, '__TFs10minElementuRxs12__')
+            self.assertEqual(regions[2].start_address, 10768)
+            self.assertEqual(regions[2].end_address, 10975)
+
+            self.assertEqual(regions[3].name, '__TFSSCfT21_builtinString')
+            self.assertEqual(regions[3].start_address, 11152)
+            self.assertEqual(regions[3].end_address, 11807)
+
+            self.assertEqual(regions[4].name, '__bss')
+            self.assertEqual(regions[4].size, 1896)

From 23959b79713c3da2014368026e7c1f74212b4bed Mon Sep 17 00:00:00 2001
From: Brian Gesiak <modocache@gmail.com>
Date: Sun, 3 Jan 2016 20:51:09 +0000
Subject: [PATCH 11/12] output list sizes using csv

---
 utils/cmpcodesize/cmpcodesize/compare.py      | 20 +++++----
 utils/cmpcodesize/cmpcodesize/csvutils.py     | 28 ++++++++++++
 utils/cmpcodesize/cmpcodesize/main.py         | 16 +++++--
 utils/cmpcodesize/cmpcodesize/output.py       | 39 ++++++++++++++++
 utils/cmpcodesize/tests/test_csvutils.py      | 44 +++++++++++++++++++
 .../tests/test_list_function_sizes.py         | 18 ++++----
 utils/cmpcodesize/tests/test_output.py        | 38 ++++++++++++++++
 7 files changed, 183 insertions(+), 20 deletions(-)
 create mode 100644 utils/cmpcodesize/cmpcodesize/csvutils.py
 create mode 100644 utils/cmpcodesize/cmpcodesize/output.py
 create mode 100644 utils/cmpcodesize/tests/test_csvutils.py
 create mode 100644 utils/cmpcodesize/tests/test_output.py

diff --git a/utils/cmpcodesize/cmpcodesize/compare.py b/utils/cmpcodesize/cmpcodesize/compare.py
index 03049a27c2be2..55cc6fcee0aa6 100644
--- a/utils/cmpcodesize/cmpcodesize/compare.py
+++ b/utils/cmpcodesize/cmpcodesize/compare.py
@@ -15,7 +15,7 @@
 import collections
 from operator import itemgetter
 
-from cmpcodesize import otool, parser, regex
+from cmpcodesize import otool, output, parser, regex
 
 Prefixes = {
     # Cpp
@@ -153,11 +153,13 @@ def compareSizesOfFile(oldFiles, newFiles, allSections, listCategories):
         compareSizes(oldSizes, newSizes, "__bss", sectionTitle)
 
 
-def listFunctionSizes(sizeArray):
-    for pair in sorted(sizeArray, key=itemgetter(1)):
-        name = pair[0]
-        size = pair[1]
-        yield "%8d %s" % (size, name)
+def list_function_sizes(sizes):
+    """
+    Given a list of function name and size tuples, yield dicts for those sizes.
+    The dicts are sorted based on their size.
+    """
+    for pair in sorted(sizes, key=itemgetter(1)):
+        yield {'size': pair[1], 'name': pair[0]}
 
 
 def compareFunctionSizes(oldFiles, newFiles):
@@ -192,13 +194,15 @@ def compareFunctionSizes(oldFiles, newFiles):
 
     if onlyInFile1:
         print("Only in old file(s)")
-        print(listFunctionSizes(onlyInFile1))
+        output.print_listed_function_sizes(list_function_sizes(onlyInFile1),
+                                           output.Format.PLAINTEXT)
         print("Total size of functions only in old file: {}".format(onlyInFile1Size))
         print()
 
     if onlyInFile2:
         print("Only in new files(s)")
-        print(listFunctionSizes(onlyInFile2))
+        output.print_listed_function_sizes(list_function_sizes(onlyInFile2),
+                                           output.Format.PLAINTEXT)
         print("Total size of functions only in new file: {}".format(onlyInFile2Size))
         print()
 
diff --git a/utils/cmpcodesize/cmpcodesize/csvutils.py b/utils/cmpcodesize/cmpcodesize/csvutils.py
new file mode 100644
index 0000000000000..e7dfcb4cb229d
--- /dev/null
+++ b/utils/cmpcodesize/cmpcodesize/csvutils.py
@@ -0,0 +1,28 @@
+# cmpcodesize/csvutils.py - Transform dicts to CSV -*- python -*-
+#
+# This source file is part of the Swift.org open source project
+#
+# Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
+# Licensed under Apache License v2.0 with Runtime Library Exception
+#
+# See http://swift.org/LICENSE.txt for license information
+# See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+
+import csv
+
+
+def write_csv(dicts, out):
+    """
+    Write an iterable series of dicts, as CSV, to the specified 'out'.
+    The field names of the CSV are inferred from the first dict in the series.
+    If the series is empty, output nothing.
+    """
+    try:
+        first = next(dicts)
+    except StopIteration:
+        return
+
+    writer = csv.DictWriter(out, fieldnames=sorted(first.keys()))
+    writer.writeheader()
+    writer.writerow(first)
+    writer.writerows(dicts)
diff --git a/utils/cmpcodesize/cmpcodesize/main.py b/utils/cmpcodesize/cmpcodesize/main.py
index b2fb49c5f028b..6a685263f171d 100644
--- a/utils/cmpcodesize/cmpcodesize/main.py
+++ b/utils/cmpcodesize/cmpcodesize/main.py
@@ -17,8 +17,13 @@
 import glob
 import collections
 
-from cmpcodesize.compare import \
-    compareFunctionSizes, compareSizesOfFile, listFunctionSizes, readSizes
+from cmpcodesize.compare import (
+    compareFunctionSizes,
+    compareSizesOfFile,
+    readSizes,
+    list_function_sizes,
+)
+from cmpcodesize.output import Format, print_listed_function_sizes
 
 
 SHORTCUTS = {
@@ -99,6 +104,10 @@ def main():
                         action='store_true',
                         dest='sum_sizes',
                         default=False)
+    parser.add_argument('-f', '--format',
+                        help='How to format the output.',
+                        choices=[Format.PLAINTEXT, Format.CSV],
+                        default=Format.CSV)
 
     # Positional arguments.
     # These can be specified in means beyond what argparse supports,
@@ -177,7 +186,8 @@ def main():
             sizes = collections.defaultdict(int)
             for file in oldFiles:
                 readSizes(sizes, file, True, False)
-            print(os.linesep.join(listFunctionSizes(sizes.items())))
+            print_listed_function_sizes(list_function_sizes(sizes.items()),
+                                        parsed_arguments.format)
         else:
             compareFunctionSizes(oldFiles, newFiles)
     else:
diff --git a/utils/cmpcodesize/cmpcodesize/output.py b/utils/cmpcodesize/cmpcodesize/output.py
new file mode 100644
index 0000000000000..1b254d07dc10e
--- /dev/null
+++ b/utils/cmpcodesize/cmpcodesize/output.py
@@ -0,0 +1,39 @@
+# cmpcodesize/output.py - Printing options for cmpcodesize -*- python -*-
+#
+# This source file is part of the Swift.org open source project
+#
+# Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
+# Licensed under Apache License v2.0 with Runtime Library Exception
+#
+# See http://swift.org/LICENSE.txt for license information
+# See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+
+
+from __future__ import absolute_import, print_function
+
+import sys
+
+from . import csvutils
+
+
+class Format:
+    """
+    A class-based enum for output format options.
+    """
+    PLAINTEXT = 'plaintext'  # Plain text with no particular format.
+    CSV = 'csv'
+
+
+def print_listed_function_sizes(sizes, format_option, out=sys.stdout):
+    """
+    Prints the return value of cmpcodesize.compare.list_function_sizes to the
+    designated output, based on the given formatting option.
+    """
+    if format_option == Format.CSV:
+        csvutils.write_csv(sizes, out=out)
+    elif format_option == Format.PLAINTEXT:
+        for size in sizes:
+            print('{:>8} {}'.format(size['size'], size['name']), file=out)
+    else:
+        raise ValueError(
+            'Unrecognized format option: {}'.format(format_option))
diff --git a/utils/cmpcodesize/tests/test_csvutils.py b/utils/cmpcodesize/tests/test_csvutils.py
new file mode 100644
index 0000000000000..be44f41bf7d23
--- /dev/null
+++ b/utils/cmpcodesize/tests/test_csvutils.py
@@ -0,0 +1,44 @@
+# test_csvutils.py - Unit tests for cmpcodesize.csvutils -*- python -*-
+#
+# This source file is part of the Swift.org open source project
+#
+# Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
+# Licensed under Apache License v2.0 with Runtime Library Exception
+#
+# See http://swift.org/LICENSE.txt for license information
+# See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+
+import StringIO
+import unittest
+
+from cmpcodesize.csvutils import write_csv
+
+
+class WriteCsvTestCase(unittest.TestCase):
+    def setUp(self):
+        self.out = StringIO.StringIO()
+
+    def tearDown(self):
+        self.out.close()
+
+    def test_when_dicts_is_empty_writes_nothing(self):
+        write_csv(iter([]), out=self.out)
+        self.assertEqual(self.out.getvalue(), '')
+
+    def test_when_dicts_is_not_empty_writes_csv(self):
+        dicts = [
+            {'foo': 0, 'bar': 1, 'baz': 2},
+            {'foo': 3, 'bar': 4, 'baz': 5},
+            {'foo': 6, 'bar': 7, 'baz': 8},
+        ]
+        write_csv(iter(dicts), out=self.out)
+        self.assertEqual(self.out.getvalue().splitlines(), [
+            'bar,baz,foo',
+            '1,2,0',
+            '4,5,3',
+            '7,8,6',
+        ])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/utils/cmpcodesize/tests/test_list_function_sizes.py b/utils/cmpcodesize/tests/test_list_function_sizes.py
index e0c8f2691873d..1380615bc51f4 100644
--- a/utils/cmpcodesize/tests/test_list_function_sizes.py
+++ b/utils/cmpcodesize/tests/test_list_function_sizes.py
@@ -1,4 +1,4 @@
-# test_list_function_sizes.py - Unit tests for listFunctionSizes -*- python -*-
+# test_list_function_sizes.py - Tests for list_function_sizes -*- python -*-
 #
 # This source file is part of the Swift.org open source project
 #
@@ -10,16 +10,16 @@
 
 import unittest
 
-from cmpcodesize.compare import listFunctionSizes
+from cmpcodesize.compare import list_function_sizes
 
 
 class ListFunctionSizesTestCase(unittest.TestCase):
     def test_when_sizes_is_none_raises(self):
         with self.assertRaises(TypeError):
-            list(listFunctionSizes(None))
+            list(list_function_sizes(None))
 
-    def test_when_sizes_is_empty_returns_none(self):
-        self.assertEqual(list(listFunctionSizes([])), [])
+    def test_when_sizes_is_empty_returns_empty(self):
+        self.assertEqual(list(list_function_sizes([])), [])
 
     def test_lists_each_entry(self):
         sizes = {
@@ -27,10 +27,10 @@ def test_lists_each_entry(self):
             'bar': 10,
             'baz': 100,
         }
-        self.assertEqual(list(listFunctionSizes(sizes.items())), [
-            '       1 foo',
-            '      10 bar',
-            '     100 baz',
+        self.assertEqual(list(list_function_sizes(sizes.items())), [
+            {'size': 1, 'name': 'foo'},
+            {'size': 10, 'name': 'bar'},
+            {'size': 100, 'name': 'baz'},
         ])
 
 if __name__ == '__main__':
diff --git a/utils/cmpcodesize/tests/test_output.py b/utils/cmpcodesize/tests/test_output.py
new file mode 100644
index 0000000000000..87ea440f4877d
--- /dev/null
+++ b/utils/cmpcodesize/tests/test_output.py
@@ -0,0 +1,38 @@
+# test_output.py - Unit tests for cmpcodesize.output -*- python -*-
+#
+# This source file is part of the Swift.org open source project
+#
+# Copyright (c) 2014 - 2015 Apple Inc. and the Swift project authors
+# Licensed under Apache License v2.0 with Runtime Library Exception
+#
+# See http://swift.org/LICENSE.txt for license information
+# See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+
+import StringIO
+import unittest
+
+from cmpcodesize.output import Format, print_listed_function_sizes
+
+
+class PrintListedFunctionSizesTestCase(unittest.TestCase):
+    def setUp(self):
+        self.out = StringIO.StringIO()
+
+    def tearDown(self):
+        self.out.close()
+
+    def test_when_format_is_plaintext_outputs_plaintext(self):
+        sizes = [
+            {'size': 1, 'name': 'foo'},
+            {'size': 10, 'name': 'bar'},
+            {'size': 100, 'name': 'baz'},
+        ]
+        print_listed_function_sizes(sizes, Format.PLAINTEXT, out=self.out)
+        self.assertEqual(self.out.getvalue().splitlines(), [
+            '       1 foo',
+            '      10 bar',
+            '     100 baz',
+        ])
+
+if __name__ == '__main__':
+    unittest.main()

From 6751100dbd3af6043439ebe22e0c71f5450a1144 Mon Sep 17 00:00:00 2001
From: Brian Gesiak <modocache@gmail.com>
Date: Sun, 3 Jan 2016 23:03:22 +0000
Subject: [PATCH 12/12] just about to finish writing unit tests for csv output
 for compareFunctionSizes()

---
 utils/cmpcodesize/cmpcodesize/compare.py      | 88 +++++++++++--------
 utils/cmpcodesize/cmpcodesize/main.py         |  5 +-
 utils/cmpcodesize/cmpcodesize/output.py       | 66 +++++++++++++-
 ...list_function_sizes.py => test_compare.py} | 10 ++-
 utils/cmpcodesize/tests/test_output.py        | 83 ++++++++++++++++-
 5 files changed, 210 insertions(+), 42 deletions(-)
 rename utils/cmpcodesize/tests/{test_list_function_sizes.py => test_compare.py} (81%)

diff --git a/utils/cmpcodesize/cmpcodesize/compare.py b/utils/cmpcodesize/cmpcodesize/compare.py
index 55cc6fcee0aa6..d45f50fac628f 100644
--- a/utils/cmpcodesize/cmpcodesize/compare.py
+++ b/utils/cmpcodesize/cmpcodesize/compare.py
@@ -12,6 +12,7 @@
 
 import re
 import os
+import sys
 import collections
 from operator import itemgetter
 
@@ -162,21 +163,33 @@ def list_function_sizes(sizes):
         yield {'size': pair[1], 'name': pair[0]}
 
 
-def compareFunctionSizes(oldFiles, newFiles):
-    oldSizes = collections.defaultdict(int)
-    newSizes = collections.defaultdict(int)
-    for name in oldFiles:
-        readSizes(oldSizes, name, True, False)
-    for name in newFiles:
-        readSizes(newSizes, name, True, False)
-
+def output_compare_function_sizes(
+        oldSizes,
+        newSizes,
+        format_option,
+        first_only_out=sys.stdout,
+        second_only_out=sys.stdout,
+        both_out=sys.stdout):
+    """
+    Given two lists of functions and their sizes:
+
+    1. Output the size of each function that only exists in the first list,
+       as well as the total size of all functions that only exist in the first
+       list.
+    2. Same as (1), but for the second list.
+    3. For each function that exists in both lists, output its size in the
+       first, its size in the second, and the difference between the two.
+
+    The output for each of these three items may be redirected using the
+    'first_only_out', 'second_only_out', and 'both_out' parameters. By default,
+    these are directed to stdout.
+    """
     onlyInFile1 = []
     onlyInFile2 = []
     inBoth = []
 
     onlyInFile1Size = 0
     onlyInFile2Size = 0
-    inBothSize = 0
 
     for func, oldSize in oldSizes.items():
         newSize = newSizes[func]
@@ -193,36 +206,37 @@ def compareFunctionSizes(oldFiles, newFiles):
             onlyInFile2Size += newSize
 
     if onlyInFile1:
-        print("Only in old file(s)")
+        output.print_plaintext('Only in old file(s)',
+                               format_option, out=first_only_out)
         output.print_listed_function_sizes(list_function_sizes(onlyInFile1),
-                                           output.Format.PLAINTEXT)
-        print("Total size of functions only in old file: {}".format(onlyInFile1Size))
-        print()
+                                           format_option, out=first_only_out)
+        output.print_plaintext('Total size of functions only in old file: '
+                               '{}\n'.format(onlyInFile1Size),
+                               format_option, out=first_only_out)
 
     if onlyInFile2:
-        print("Only in new files(s)")
+        output.print_plaintext('Only in new files(s)',
+                               format_option, out=second_only_out)
         output.print_listed_function_sizes(list_function_sizes(onlyInFile2),
-                                           output.Format.PLAINTEXT)
-        print("Total size of functions only in new file: {}".format(onlyInFile2Size))
-        print()
-
+                                           format_option, out=second_only_out)
+        output.print_plaintext('Total size of functions only in new file: '
+                               '{}\n'.format(onlyInFile2Size),
+                               format_option, out=second_only_out)
     if inBoth:
-        sizeIncrease = 0
-        sizeDecrease = 0
-        print("%8s %8s %8s" % ("old", "new", "diff"))
-        for triple in sorted(inBoth, key=lambda tup: (tup[2] - tup[1], tup[1])):
-            func = triple[0]
-            oldSize = triple[1]
-            newSize = triple[2]
-            diff = newSize - oldSize
-            if diff > 0:
-                sizeIncrease += diff
-            else:
-                sizeDecrease -= diff
-            if diff == 0:
-                inBothSize += newSize
-            print("%8d %8d %8d %s" %(oldSize, newSize, newSize - oldSize, func))
-        print("Total size of functions with the same size in both files: {}".format(inBothSize))
-        print("Total size of functions that got smaller: {}".format(sizeDecrease))
-        print("Total size of functions that got bigger: {}".format(sizeIncrease))
-        print("Total size change of functions present in both files: {}".format(sizeIncrease - sizeDecrease))
+        output.print_compare_function_sizes(inBoth,
+                                            format_option, out=both_out)
+
+
+def compareFunctionSizes(oldFiles, newFiles):
+    """
+    Given two lists of Mach-O files, return a tuple of dicts; the first element
+    is a dictionary of functions in the first list and their sizes, and the
+    second element is a dictionary for the second list.
+    """
+    oldSizes = collections.defaultdict(int)
+    newSizes = collections.defaultdict(int)
+    for name in oldFiles:
+        readSizes(oldSizes, name, True, False)
+    for name in newFiles:
+        readSizes(newSizes, name, True, False)
+    return (oldSizes, newSizes)
diff --git a/utils/cmpcodesize/cmpcodesize/main.py b/utils/cmpcodesize/cmpcodesize/main.py
index 6a685263f171d..be47588de0bd9 100644
--- a/utils/cmpcodesize/cmpcodesize/main.py
+++ b/utils/cmpcodesize/cmpcodesize/main.py
@@ -22,6 +22,7 @@
     compareSizesOfFile,
     readSizes,
     list_function_sizes,
+    output_compare_function_sizes,
 )
 from cmpcodesize.output import Format, print_listed_function_sizes
 
@@ -189,7 +190,9 @@ def main():
             print_listed_function_sizes(list_function_sizes(sizes.items()),
                                         parsed_arguments.format)
         else:
-            compareFunctionSizes(oldFiles, newFiles)
+            old_sizes, new_sizes = compareFunctionSizes(oldFiles, newFiles)
+            output_compare_function_sizes(
+                old_sizes, new_sizes, parsed_arguments.format)
     else:
         print("%-26s%16s  %8s  %8s  %s" % ("", "Section", "Old", "New", "Percent"))
         if parsed_arguments.sum_sizes:
diff --git a/utils/cmpcodesize/cmpcodesize/output.py b/utils/cmpcodesize/cmpcodesize/output.py
index 1b254d07dc10e..c6ac6d26cf8e1 100644
--- a/utils/cmpcodesize/cmpcodesize/output.py
+++ b/utils/cmpcodesize/cmpcodesize/output.py
@@ -24,6 +24,24 @@ class Format:
     CSV = 'csv'
 
 
+def _raise_unrecognized_format(format_option):
+    """
+    Raise an exception indicating the format option string provided
+    is not valid.
+    """
+    raise ValueError('Unrecognized format option: {}'.format(format_option))
+
+
+def print_plaintext(message, format_option, out=sys.stdout):
+    """Print iff the format is plaintext."""
+    if format_option == Format.CSV:
+        pass
+    elif format_option == Format.PLAINTEXT:
+        print(message, file=out)
+    else:
+        _raise_unrecognized_format(format_option)
+
+
 def print_listed_function_sizes(sizes, format_option, out=sys.stdout):
     """
     Prints the return value of cmpcodesize.compare.list_function_sizes to the
@@ -35,5 +53,49 @@ def print_listed_function_sizes(sizes, format_option, out=sys.stdout):
         for size in sizes:
             print('{:>8} {}'.format(size['size'], size['name']), file=out)
     else:
-        raise ValueError(
-            'Unrecognized format option: {}'.format(format_option))
+        _raise_unrecognized_format(format_option)
+
+
+def print_compare_function_sizes(inBoth, format_option, out=sys.stdout):
+    """
+    TODO, inBoth is an array of triples: function name, old size, new size.
+    """
+    if format_option == Format.PLAINTEXT:
+        sizeIncrease = 0
+        sizeDecrease = 0
+        inBothSize = 0
+        print('%8s %8s %8s' % ('old', 'new', 'diff'), file=out)
+        for triple in sorted(inBoth, key=lambda tup: (tup[2] - tup[1], tup[1])):
+            func = triple[0]
+            oldSize = triple[1]
+            newSize = triple[2]
+            diff = newSize - oldSize
+            if diff > 0:
+                sizeIncrease += diff
+            else:
+                sizeDecrease -= diff
+            if diff == 0:
+                inBothSize += newSize
+            print('%8d %8d %8d %s' % (oldSize, newSize, newSize - oldSize, func),
+                  file=out)
+
+        print('Total size of functions with the same size in both '
+              'files: {}'.format(inBothSize), file=out)
+        print('Total size of functions that got smaller: '
+              '{}'.format(sizeDecrease), file=out)
+        print('Total size of functions that got bigger: '
+              '{}'.format(sizeIncrease), file=out)
+        print('Total size change of functions present in both files: '
+              '{}'.format(sizeIncrease - sizeDecrease), file=out)
+    elif format_option == Format.CSV:
+        diffs = []
+        for diff in inBoth:
+            diffs.append({
+                'name': diff[0],
+                'old': diff[1],
+                'new': diff[2],
+                'diff': diff[2] - diff[1],
+            })
+        csvutils.write_csv(iter(diffs), out)
+    else:
+        _raise_unrecognized_format(format_option)
diff --git a/utils/cmpcodesize/tests/test_list_function_sizes.py b/utils/cmpcodesize/tests/test_compare.py
similarity index 81%
rename from utils/cmpcodesize/tests/test_list_function_sizes.py
rename to utils/cmpcodesize/tests/test_compare.py
index 1380615bc51f4..de9a2ade06fd7 100644
--- a/utils/cmpcodesize/tests/test_list_function_sizes.py
+++ b/utils/cmpcodesize/tests/test_compare.py
@@ -1,4 +1,4 @@
-# test_list_function_sizes.py - Tests for list_function_sizes -*- python -*-
+# test_compare.py - Unit tests for cmpcodesize.compare -*- python -*-
 #
 # This source file is part of the Swift.org open source project
 #
@@ -33,5 +33,13 @@ def test_lists_each_entry(self):
             {'size': 100, 'name': 'baz'},
         ])
 
+
+class OutputCompareFunctionSizesTestCase(unittest.TestCase):
+    def test_when_format_is_plaintext_(self):
+        pass
+
+    def test_when_format_is_csv_(self):
+        pass
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/utils/cmpcodesize/tests/test_output.py b/utils/cmpcodesize/tests/test_output.py
index 87ea440f4877d..28385a3f6f649 100644
--- a/utils/cmpcodesize/tests/test_output.py
+++ b/utils/cmpcodesize/tests/test_output.py
@@ -11,7 +11,72 @@
 import StringIO
 import unittest
 
-from cmpcodesize.output import Format, print_listed_function_sizes
+from cmpcodesize.output import (
+    Format,
+    print_compare_function_sizes,
+    print_listed_function_sizes,
+    print_plaintext,
+)
+
+
+class PrintCompareFunctionSizesTestCase(unittest.TestCase):
+    def setUp(self):
+        self.out = StringIO.StringIO()
+
+    def tearDown(self):
+        self.out.close()
+
+    def test_when_format_is_plaintext_but_input_is_none_raises(self):
+        with self.assertRaises(TypeError):
+            print_compare_function_sizes(None, Format.PLAINTEXT, out=self.out)
+
+    def test_when_format_is_plaintext_but_input_is_empty_prints_zeros(self):
+        print_compare_function_sizes([], Format.PLAINTEXT, out=self.out)
+        self.assertEqual(self.out.getvalue().splitlines(), [
+            '     old      new     diff',
+            'Total size of functions with the same size in both files: 0',
+            'Total size of functions that got smaller: 0',
+            'Total size of functions that got bigger: 0',
+            'Total size change of functions present in both files: 0',
+        ])
+
+    def test_when_format_is_plaintext_and_input_is_not_empty_prints(self):
+        print_compare_function_sizes([
+            ('foo', 4, 10),
+            ('bar', 10, 4),
+            ('baz', 4, 4),
+        ], Format.PLAINTEXT, out=self.out)
+        self.assertEqual(self.out.getvalue().splitlines(), [
+            '     old      new     diff',
+            '      10        4       -6 bar',
+            '       4        4        0 baz',
+            '       4       10        6 foo',
+            'Total size of functions with the same size in both files: 4',
+            'Total size of functions that got smaller: 6',
+            'Total size of functions that got bigger: 6',
+            'Total size change of functions present in both files: 0',
+        ])
+
+    def test_when_format_is_csv_but_input_is_none_raises(self):
+        with self.assertRaises(TypeError):
+            print_compare_function_sizes(None, Format.CSV, out=self.out)
+
+    def test_when_format_is_csv_but_input_is_empty_does_not_print(self):
+        print_compare_function_sizes([], Format.CSV, out=self.out)
+        self.assertEqual(self.out.getvalue(), '')
+
+    def test_when_format_is_csv_and_input_is_not_empty_prints(self):
+        print_compare_function_sizes([
+            ('foo', 4, 10),
+            ('bar', 10, 4),
+            ('baz', 4, 4),
+        ], Format.CSV, out=self.out)
+        self.assertEqual(self.out.getvalue().splitlines(), [
+            'diff,name,new,old',
+            '6,foo,10,4',
+            '-6,bar,4,10',
+            '0,baz,4,4'
+        ])
 
 
 class PrintListedFunctionSizesTestCase(unittest.TestCase):
@@ -34,5 +99,21 @@ def test_when_format_is_plaintext_outputs_plaintext(self):
             '     100 baz',
         ])
 
+
+class PrintPlaintextTestCase(unittest.TestCase):
+    def setUp(self):
+        self.out = StringIO.StringIO()
+
+    def tearDown(self):
+        self.out.close()
+
+    def test_when_format_is_plaintext_prints(self):
+        print_plaintext('foo', Format.PLAINTEXT, self.out)
+        self.assertEqual(self.out.getvalue(), 'foo\n')
+
+    def test_when_format_is_not_plaintext_does_not_print(self):
+        print_plaintext('bar', Format.CSV, self.out)
+        self.assertEqual(self.out.getvalue(), '')
+
 if __name__ == '__main__':
     unittest.main()