Add some data.

mebjas · Jun 15, 2024 · 0dcc8c7 · 0dcc8c7
1 parent f237e2d
commit 0dcc8c7
Show file tree

Hide file tree

Showing 40 changed files with 1,834 additions and 0 deletions.
diff --git a/_data/model-performance/1--conv2d-104202params/f16.txt b/_data/model-performance/1--conv2d-104202params/f16.txt
@@ -0,0 +1,82 @@
+INFO: STARTING!                                        
+INFO: Log parameter values verbosely: [0]
+INFO: Min num runs: [50]
+INFO: Num threads: [1]
+INFO: Min warmup runs: [1]
+INFO: Graph: [fp16.tflite]
+INFO: Signature to run: []
+INFO: Enable op profiling: [1]
+INFO: #threads used for CPU inference: [1]
+INFO: Loaded model fp16.tflite
+INFO: Initialized TensorFlow Lite runtime.
+INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
+VERBOSE: Replacing 16 out of 16 node(s) with delegate (TfLiteXNNPackDelegate) node, yielding 1 partitions for the whole graph.
+INFO: The input model file size (MB): 0.212812
+INFO: Initialized session in 7.083ms.
+INFO: Running benchmark for at least 1 iterations and at least 0.5 seconds but terminate if exceeding 150 seconds.
+INFO: count=4089 first=533 curr=114 min=111 max=982 avg=122.066 std=40
+
+INFO: Running benchmark for at least 50 iterations and at least 1 seconds but terminate if exceeding 150 seconds.
+INFO: count=8446 first=194 curr=115 min=114 max=207 avg=116.047 std=5
+
+INFO: Inference timings in us: Init: 7083, First inference: 533, Warmup (avg): 122.066, Inference (avg): 116.047
+INFO: Note: as the benchmark tool itself affects memory footprint, the following is only APPROXIMATE to the actual memory footprint of the model at runtime. Take the information at your discretion.
+INFO: Memory footprint delta from the start of the tool (MB): init=4.32422 overall=4.32422
+INFO: Profiling Info for Benchmark Initialization:
+============================== Run Order ==============================
+	                             [node type]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	                 ModifyGraphWithDelegate	    2.646	    2.646	 86.584%	 86.584%	  1472.000	        1	ModifyGraphWithDelegate/0
+	                         AllocateTensors	    0.410	    0.410	 13.416%	100.000%	   236.000	        1	AllocateTensors/0
+
+============================== Top by Computation Time ==============================
+	                             [node type]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	                 ModifyGraphWithDelegate	    2.646	    2.646	 86.584%	 86.584%	  1472.000	        1	ModifyGraphWithDelegate/0
+	                         AllocateTensors	    0.410	    0.410	 13.416%	100.000%	   236.000	        1	AllocateTensors/0
+
+Number of nodes executed: 2
+============================== Summary by node type ==============================
+	                             [Node type]	  [count]	  [avg ms]	    [avg %]	    [cdf %]	  [mem KB]	[times called]
+	                 ModifyGraphWithDelegate	        1	     2.646	    86.584%	    86.584%	  1472.000	        1
+	                         AllocateTensors	        1	     0.410	    13.416%	   100.000%	   236.000	        1
+
+Timings (microseconds): count=1 curr=3056
+Memory (bytes): count=0
+2 nodes observed
+
+
+
+INFO: Operator-wise Profiling Info for Regular Benchmark Runs:
+============================== Run Order ==============================
+	                             [node type]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	           Convolution (NHWC, F32) IGEMM	    0.023	    0.017	 15.537%	 15.537%	     0.000	        1	Delegate/Convolution (NHWC, F32) IGEMM:0
+	                 Max Pooling (NHWC, F32)	    0.007	    0.003	  2.735%	 18.272%	     0.000	        1	Delegate/Max Pooling (NHWC, F32):1
+	           Convolution (NHWC, F32) IGEMM	    0.072	    0.065	 59.398%	 77.670%	     0.000	        1	Delegate/Convolution (NHWC, F32) IGEMM:2
+	                 Max Pooling (NHWC, F32)	    0.001	    0.000	  0.078%	 77.748%	     0.000	        1	Delegate/Max Pooling (NHWC, F32):3
+	           Convolution (NHWC, F32) IGEMM	    0.038	    0.024	 21.332%	 99.079%	     0.000	        1	Delegate/Convolution (NHWC, F32) IGEMM:4
+	                          Copy (NC, X32)	    0.000	    0.000	  0.000%	 99.079%	     0.000	        1	Delegate/Copy (NC, X32):5
+	          Fully Connected (NC, F32) GEMM	    0.006	    0.001	  0.921%	100.000%	     0.000	        1	Delegate/Fully Connected (NC, F32) GEMM:6
+	                       Softmax (NC, F32)	    0.000	    0.000	  0.000%	100.000%	     0.000	        1	Delegate/Softmax (NC, F32):7
+
+============================== Top by Computation Time ==============================
+	                             [node type]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	           Convolution (NHWC, F32) IGEMM	    0.072	    0.065	 59.398%	 59.398%	     0.000	        1	Delegate/Convolution (NHWC, F32) IGEMM:2
+	           Convolution (NHWC, F32) IGEMM	    0.038	    0.024	 21.332%	 80.729%	     0.000	        1	Delegate/Convolution (NHWC, F32) IGEMM:4
+	           Convolution (NHWC, F32) IGEMM	    0.023	    0.017	 15.537%	 96.266%	     0.000	        1	Delegate/Convolution (NHWC, F32) IGEMM:0
+	                 Max Pooling (NHWC, F32)	    0.007	    0.003	  2.735%	 99.001%	     0.000	        1	Delegate/Max Pooling (NHWC, F32):1
+	          Fully Connected (NC, F32) GEMM	    0.006	    0.001	  0.921%	 99.922%	     0.000	        1	Delegate/Fully Connected (NC, F32) GEMM:6
+	                 Max Pooling (NHWC, F32)	    0.001	    0.000	  0.078%	100.000%	     0.000	        1	Delegate/Max Pooling (NHWC, F32):3
+	                          Copy (NC, X32)	    0.000	    0.000	  0.000%	100.000%	     0.000	        1	Delegate/Copy (NC, X32):5
+	                       Softmax (NC, F32)	    0.000	    0.000	  0.000%	100.000%	     0.000	        1	Delegate/Softmax (NC, F32):7
+
+Number of nodes executed: 8
+============================== Summary by node type ==============================
+	                             [Node type]	  [count]	  [avg ms]	    [avg %]	    [cdf %]	  [mem KB]	[times called]
+	           Convolution (NHWC, F32) IGEMM	        3	     0.105	    96.330%	    96.330%	     0.000	        3
+	                 Max Pooling (NHWC, F32)	        2	     0.003	     2.752%	    99.083%	     0.000	        2
+	          Fully Connected (NC, F32) GEMM	        1	     0.001	     0.917%	   100.000%	     0.000	        1
+	                       Softmax (NC, F32)	        1	     0.000	     0.000%	   100.000%	     0.000	        1
+	                          Copy (NC, X32)	        1	     0.000	     0.000%	   100.000%	     0.000	        1
+
+Timings (microseconds): count=8446 first=147 curr=109 min=109 max=199 avg=110.266 std=5
+Memory (bytes): count=0
+8 nodes observed
diff --git a/_data/model-performance/1--conv2d-104202params/f32.tflite b/_data/model-performance/1--conv2d-104202params/f32.tflite
diff --git a/_data/model-performance/1--conv2d-104202params/f32.txt b/_data/model-performance/1--conv2d-104202params/f32.txt
@@ -0,0 +1,83 @@
+INFO: STARTING!
+INFO: Log parameter values verbosely: [0]
+INFO: Min num runs: [50]
+INFO: Num threads: [1]
+INFO: Min warmup runs: [1]
+INFO: Graph: [f32.tflite]
+INFO: Signature to run: []
+INFO: Enable op profiling: [1]
+INFO: #threads used for CPU inference: [1]
+INFO: Loaded model f32.tflite
+INFO: Initialized TensorFlow Lite runtime.
+INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
+VERBOSE: Replacing 8 out of 8 node(s) with delegate (TfLiteXNNPackDelegate) node, yielding 1 partitions for the whole graph.
+INFO: The input model file size (MB): 0.420228
+INFO: Initialized session in 4.369ms.
+INFO: Running benchmark for at least 1 iterations and at least 0.5 seconds but terminate if exceeding 150 seconds.
+INFO: count=3961 first=676 curr=114 min=111 max=2812 avg=126.005 std=69
+
+INFO: Running benchmark for at least 50 iterations and at least 1 seconds but terminate if exceeding 150 seconds.
+INFO: count=8338 first=145 curr=120 min=114 max=436 avg=117.546 std=7
+
+INFO: Inference timings in us: Init: 4369, First inference: 676, Warmup (avg): 126.005, Inference (avg): 117.546
+INFO: Note: as the benchmark tool itself affects memory footprint, the following is only APPROXIMATE to the actual memory footprint of the model at runtime. Take the information at your discretion.
+INFO: Memory footprint delta from the start of the tool (MB): init=4.09766 overall=4.09766
+INFO: Profiling Info for Benchmark Initialization:
+============================== Run Order ==============================
+	                             [node type]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	                 ModifyGraphWithDelegate	    1.186	    1.186	 79.437%	 79.437%	  1352.000	        1	ModifyGraphWithDelegate/0
+	                         AllocateTensors	    0.307	    0.307	 20.563%	100.000%	   244.000	        1	AllocateTensors/0
+
+============================== Top by Computation Time ==============================
+	                             [node type]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	                 ModifyGraphWithDelegate	    1.186	    1.186	 79.437%	 79.437%	  1352.000	        1	ModifyGraphWithDelegate/0
+	                         AllocateTensors	    0.307	    0.307	 20.563%	100.000%	   244.000	        1	AllocateTensors/0
+
+Number of nodes executed: 2
+============================== Summary by node type ==============================
+	                             [Node type]	  [count]	  [avg ms]	    [avg %]	    [cdf %]	  [mem KB]	[times called]
+	                 ModifyGraphWithDelegate	        1	     1.186	    79.437%	    79.437%	  1352.000	        1
+	                         AllocateTensors	        1	     0.307	    20.563%	   100.000%	   244.000	        1
+
+Timings (microseconds): count=1 curr=1493
+Memory (bytes): count=0
+2 nodes observed
+
+
+
+INFO: Operator-wise Profiling Info for Regular Benchmark Runs:
+============================== Run Order ==============================
+	                             [node type]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	           Convolution (NHWC, F32) IGEMM	    0.019	    0.017	 15.398%	 15.398%	     0.000	        1	Delegate/Convolution (NHWC, F32) IGEMM:0
+	                 Max Pooling (NHWC, F32)	    0.003	    0.003	  2.700%	 18.098%	     0.000	        1	Delegate/Max Pooling (NHWC, F32):1
+	           Convolution (NHWC, F32) IGEMM	    0.066	    0.066	 59.220%	 77.319%	     0.000	        1	Delegate/Convolution (NHWC, F32) IGEMM:2
+	                 Max Pooling (NHWC, F32)	    0.001	    0.000	  0.283%	 77.602%	     0.000	        1	Delegate/Max Pooling (NHWC, F32):3
+	           Convolution (NHWC, F32) IGEMM	    0.027	    0.024	 21.480%	 99.082%	     0.000	        1	Delegate/Convolution (NHWC, F32) IGEMM:4
+	                          Copy (NC, X32)	    0.000	    0.000	  0.000%	 99.082%	     0.000	        1	Delegate/Copy (NC, X32):5
+	          Fully Connected (NC, F32) GEMM	    0.002	    0.001	  0.916%	 99.999%	     0.000	        1	Delegate/Fully Connected (NC, F32) GEMM:6
+	                       Softmax (NC, F32)	    0.000	    0.000	  0.002%	100.000%	     0.000	        1	Delegate/Softmax (NC, F32):7
+
+============================== Top by Computation Time ==============================
+	                             [node type]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	           Convolution (NHWC, F32) IGEMM	    0.066	    0.066	 59.220%	 59.220%	     0.000	        1	Delegate/Convolution (NHWC, F32) IGEMM:2
+	           Convolution (NHWC, F32) IGEMM	    0.027	    0.024	 21.480%	 80.701%	     0.000	        1	Delegate/Convolution (NHWC, F32) IGEMM:4
+	           Convolution (NHWC, F32) IGEMM	    0.019	    0.017	 15.398%	 96.099%	     0.000	        1	Delegate/Convolution (NHWC, F32) IGEMM:0
+	                 Max Pooling (NHWC, F32)	    0.003	    0.003	  2.700%	 98.799%	     0.000	        1	Delegate/Max Pooling (NHWC, F32):1
+	          Fully Connected (NC, F32) GEMM	    0.002	    0.001	  0.916%	 99.715%	     0.000	        1	Delegate/Fully Connected (NC, F32) GEMM:6
+	                 Max Pooling (NHWC, F32)	    0.001	    0.000	  0.283%	 99.998%	     0.000	        1	Delegate/Max Pooling (NHWC, F32):3
+	                       Softmax (NC, F32)	    0.000	    0.000	  0.002%	100.000%	     0.000	        1	Delegate/Softmax (NC, F32):7
+	                          Copy (NC, X32)	    0.000	    0.000	  0.000%	100.000%	     0.000	        1	Delegate/Copy (NC, X32):5
+
+Number of nodes executed: 8
+============================== Summary by node type ==============================
+	                             [Node type]	  [count]	  [avg ms]	    [avg %]	    [cdf %]	  [mem KB]	[times called]
+	           Convolution (NHWC, F32) IGEMM	        3	     0.107	    96.396%	    96.396%	     0.000	        3
+	                 Max Pooling (NHWC, F32)	        2	     0.003	     2.703%	    99.099%	     0.000	        2
+	          Fully Connected (NC, F32) GEMM	        1	     0.001	     0.901%	   100.000%	     0.000	        1
+	                       Softmax (NC, F32)	        1	     0.000	     0.000%	   100.000%	     0.000	        1
+	                          Copy (NC, X32)	        1	     0.000	     0.000%	   100.000%	     0.000	        1
+
+Timings (microseconds): count=8338 first=118 curr=114 min=109 max=408 avg=111.813 std=7
+Memory (bytes): count=0
+8 nodes observed
+
diff --git a/_data/model-performance/1--conv2d-104202params/fp16.tflite b/_data/model-performance/1--conv2d-104202params/fp16.tflite
diff --git a/_data/model-performance/1--conv2d-104202params/u8.tflite b/_data/model-performance/1--conv2d-104202params/u8.tflite
diff --git a/_data/model-performance/1--conv2d-104202params/u8.txt b/_data/model-performance/1--conv2d-104202params/u8.txt
@@ -0,0 +1,87 @@
+INFO: STARTING!
+INFO: Log parameter values verbosely: [0]
+INFO: Min num runs: [50]
+INFO: Num threads: [1]
+INFO: Min warmup runs: [1]
+INFO: Graph: [u8.tflite]
+INFO: Signature to run: []
+INFO: Enable op profiling: [1]
+INFO: #threads used for CPU inference: [1]
+INFO: Loaded model u8.tflite
+INFO: Initialized TensorFlow Lite runtime.
+INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
+VERBOSE: Replacing 7 out of 10 node(s) with delegate (TfLiteXNNPackDelegate) node, yielding 3 partitions for the whole graph.
+INFO: The input model file size (MB): 0.114776
+INFO: Initialized session in 4.042ms.
+INFO: Running benchmark for at least 1 iterations and at least 0.5 seconds but terminate if exceeding 150 seconds.
+INFO: count=10832 first=215 curr=43 min=42 max=282 avg=46.0076 std=12
+
+INFO: Running benchmark for at least 50 iterations and at least 1 seconds but terminate if exceeding 150 seconds.
+INFO: count=20748 first=80 curr=44 min=43 max=80 avg=44.4094 std=2
+
+INFO: Inference timings in us: Init: 4042, First inference: 215, Warmup (avg): 46.0076, Inference (avg): 44.4094
+INFO: Note: as the benchmark tool itself affects memory footprint, the following is only APPROXIMATE to the actual memory footprint of the model at runtime. Take the information at your discretion.
+INFO: Memory footprint delta from the start of the tool (MB): init=3.19531 overall=3.52344
+INFO: Profiling Info for Benchmark Initialization:
+============================== Run Order ==============================
+	                             [node type]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	                 ModifyGraphWithDelegate	    0.816	    0.816	 61.446%	 61.446%	   652.000	        1	ModifyGraphWithDelegate/0
+	                         AllocateTensors	    0.512	    0.512	 38.554%	100.000%	     0.000	        1	AllocateTensors/0
+
+============================== Top by Computation Time ==============================
+	                             [node type]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	                 ModifyGraphWithDelegate	    0.816	    0.816	 61.446%	 61.446%	   652.000	        1	ModifyGraphWithDelegate/0
+	                         AllocateTensors	    0.512	    0.512	 38.554%	100.000%	     0.000	        1	AllocateTensors/0
+
+Number of nodes executed: 2
+============================== Summary by node type ==============================
+	                             [Node type]	  [count]	  [avg ms]	    [avg %]	    [cdf %]	  [mem KB]	[times called]
+	                 ModifyGraphWithDelegate	        1	     0.816	    61.446%	    61.446%	   652.000	        1
+	                         AllocateTensors	        1	     0.512	    38.554%	   100.000%	     0.000	        1
+
+Timings (microseconds): count=1 curr=1328
+Memory (bytes): count=0
+2 nodes observed
+
+
+
+INFO: Operator-wise Profiling Info for Regular Benchmark Runs:
+============================== Run Order ==============================
+	                             [node type]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	                                QUANTIZE	    0.002	    0.000	  1.026%	  1.026%	     0.000	        1	[tfl.quantize]:0
+	           Convolution (NHWC, QC8) IGEMM	    0.016	    0.014	 35.477%	 36.503%	     0.000	        1	Delegate/Convolution (NHWC, QC8) IGEMM:0
+	                  Max Pooling (NHWC, S8)	    0.001	    0.001	  2.533%	 39.037%	     0.000	        1	Delegate/Max Pooling (NHWC, S8):1
+	           Convolution (NHWC, QC8) IGEMM	    0.020	    0.018	 44.727%	 83.764%	     0.000	        1	Delegate/Convolution (NHWC, QC8) IGEMM:2
+	                  Max Pooling (NHWC, S8)	    0.000	    0.000	  0.004%	 83.767%	     0.000	        1	Delegate/Max Pooling (NHWC, S8):3
+	           Convolution (NHWC, QC8) IGEMM	    0.008	    0.006	 15.236%	 99.004%	     0.000	        1	Delegate/Convolution (NHWC, QC8) IGEMM:4
+	                           Copy (NC, X8)	    0.000	    0.000	  0.000%	 99.004%	     0.000	        1	Delegate/Copy (NC, X8):5
+	    Fully Connected (NC, QS8, QC8W) GEMM	    0.001	    0.000	  0.004%	 99.007%	     0.000	        1	Delegate/Fully Connected (NC, QS8, QC8W) GEMM:6
+	                                 SOFTMAX	    0.003	    0.000	  0.584%	 99.591%	     0.000	        1	[StatefulPartitionedCall:01]:8
+	                                QUANTIZE	    0.001	    0.000	  0.409%	100.000%	     0.000	        1	[StatefulPartitionedCall:0]:9
+
+============================== Top by Computation Time ==============================
+	                             [node type]	  [first]	 [avg ms]	     [%]	  [cdf%]	  [mem KB]	[times called]	[Name]
+	           Convolution (NHWC, QC8) IGEMM	    0.020	    0.018	 44.727%	 44.727%	     0.000	        1	Delegate/Convolution (NHWC, QC8) IGEMM:2
+	           Convolution (NHWC, QC8) IGEMM	    0.016	    0.014	 35.477%	 80.204%	     0.000	        1	Delegate/Convolution (NHWC, QC8) IGEMM:0
+	           Convolution (NHWC, QC8) IGEMM	    0.008	    0.006	 15.236%	 95.440%	     0.000	        1	Delegate/Convolution (NHWC, QC8) IGEMM:4
+	                  Max Pooling (NHWC, S8)	    0.001	    0.001	  2.533%	 97.974%	     0.000	        1	Delegate/Max Pooling (NHWC, S8):1
+	                                QUANTIZE	    0.002	    0.000	  1.026%	 99.000%	     0.000	        1	[tfl.quantize]:0
+	                                 SOFTMAX	    0.003	    0.000	  0.584%	 99.584%	     0.000	        1	[StatefulPartitionedCall:01]:8
+	                                QUANTIZE	    0.001	    0.000	  0.409%	 99.993%	     0.000	        1	[StatefulPartitionedCall:0]:9
+	    Fully Connected (NC, QS8, QC8W) GEMM	    0.001	    0.000	  0.004%	 99.996%	     0.000	        1	Delegate/Fully Connected (NC, QS8, QC8W) GEMM:6
+	                  Max Pooling (NHWC, S8)	    0.000	    0.000	  0.004%	100.000%	     0.000	        1	Delegate/Max Pooling (NHWC, S8):3
+	                           Copy (NC, X8)	    0.000	    0.000	  0.000%	100.000%	     0.000	        1	Delegate/Copy (NC, X8):5
+
+Number of nodes executed: 10
+============================== Summary by node type ==============================
+	                             [Node type]	  [count]	  [avg ms]	    [avg %]	    [cdf %]	  [mem KB]	[times called]
+	           Convolution (NHWC, QC8) IGEMM	        3	     0.037	    97.368%	    97.368%	     0.000	        3
+	                  Max Pooling (NHWC, S8)	        2	     0.001	     2.632%	   100.000%	     0.000	        2
+	                                 SOFTMAX	        1	     0.000	     0.000%	   100.000%	     0.000	        1
+	                                QUANTIZE	        2	     0.000	     0.000%	   100.000%	     0.000	        2
+	    Fully Connected (NC, QS8, QC8W) GEMM	        1	     0.000	     0.000%	   100.000%	     0.000	        1
+	                           Copy (NC, X8)	        1	     0.000	     0.000%	   100.000%	     0.000	        1
+
+Timings (microseconds): count=20748 first=52 curr=39 min=37 max=75 avg=39.6611 std=2
+Memory (bytes): count=0
+10 nodes observed
diff --git a/_data/model-performance/1--sepconv2d-22899params/f16.tflite b/_data/model-performance/1--sepconv2d-22899params/f16.tflite