@@ -18,6 +18,7 @@ limitations under the License.
18
18
#include < sys/types.h>
19
19
20
20
#include < cstring>
21
+ #include < initializer_list>
21
22
#include < memory>
22
23
#include < random>
23
24
#include < type_traits>
@@ -56,19 +57,37 @@ limitations under the License.
56
57
57
58
#endif // defind(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
58
59
60
+ #if defined(GENERIC_BENCHMARK_ALT_MEM_ATTR) && \
61
+ !defined(GENERIC_BENCHMARK_ALT_MEM_SIZE)
62
+ #error "GENERIC_BENCHMARK_ALT_MEM_SIZE missing from CXXFLAGS"
63
+ #endif // defined(GENERIC_BENCHMARK_ALT_MEM_ATTR) &&
64
+ // !defined(GENERIC_BENCHMARK_ALT_MEM_SIZE)
65
+
66
+ #if defined(GENERIC_BENCHMARK_ALT_MEM_SIZE) && \
67
+ !defined(GENERIC_BENCHMARK_ALT_MEM_ATTR)
68
+ #error "GENERIC_BENCHMARK_ALT_MEM_ATTR missing from CXXFLAGS"
69
+ #endif // defined(GENERIC_BENCHMARK_ALT_MEM_SIZE) &&
70
+ // !defined(GENERIC_BENCHMARK_ALT_MEM_ATTR)
71
+
72
+ #if defined(GENERIC_BENCHMARK_ALT_MEM_SIZE) && \
73
+ defined (GENERIC_BENCHMARK_ALT_MEM_ATTR) && defined(USE_TFLM_COMPRESSION)
74
+ #define USE_ALT_DECOMPRESSION_MEM
75
+ #endif // defined(GENERIC_BENCHMARK_ALT_MEM_SIZE) &&
76
+ // defined(GENERIC_BENCHMARK_ALT_MEM_ATTR) &&
77
+ // defined(USE_TFLM_COMPRESSION)
78
+
59
79
/*
60
- * Generic model benchmark. Evaluates runtime performance of a provided model
61
- * with random inputs.
80
+ * Generic model benchmark. Evaluates runtime performance of a provided
81
+ * model with random inputs.
62
82
*/
63
83
64
84
namespace tflite {
65
-
66
85
namespace {
67
86
68
87
using Profiler = ::tflite::MicroProfiler;
69
88
70
- // Seed used for the random input. Input data shouldn't affect invocation timing
71
- // so randomness isn't really needed.
89
+ // Seed used for the random input. Input data shouldn't affect invocation
90
+ // timing so randomness isn't really needed.
72
91
constexpr uint32_t kRandomSeed = 0xFB ;
73
92
74
93
#if !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
@@ -80,6 +99,11 @@ constexpr size_t kTensorArenaSize = GENERIC_BENCHMARK_TENSOR_ARENA_SIZE;
80
99
constexpr size_t kTensorArenaSize = 5e6 - MODEL_SIZE;
81
100
#endif // !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
82
101
102
+ #if defined(USE_ALT_DECOMPRESSION_MEM)
103
+ constexpr size_t kAltMemorySize = GENERIC_BENCHMARK_ALT_MEM_SIZE;
104
+ alignas (16 ) GENERIC_BENCHMARK_ALT_MEM_ATTR uint8_t g_alt_memory[kAltMemorySize];
105
+ #endif // defined(USE_ALT_DECOMPRESSION_MEM)
106
+
83
107
constexpr int kNumResourceVariable = 100 ;
84
108
85
109
void SetRandomInput (const uint32_t random_seed,
@@ -130,39 +154,146 @@ bool ReadFile(const char* file_name, void* buffer, size_t buffer_size) {
130
154
}
131
155
#endif // !defined(GENERIC_BENCHMARK_USING_BUILTIN_MODEL)
132
156
157
+ constexpr uint32_t kCrctabLen = 256 ;
158
+ uint32_t crctab[kCrctabLen ];
159
+
160
+ void GenCRC32Table () {
161
+ constexpr uint32_t kPolyN = 0xEDB88320 ;
162
+ for (size_t index = 0 ; index < kCrctabLen ; index ++) {
163
+ crctab[index ] = index ;
164
+ for (int i = 0 ; i < 8 ; i++) {
165
+ if (crctab[index ] & 1 ) {
166
+ crctab[index ] = (crctab[index ] >> 1 ) ^ kPolyN ;
167
+ } else {
168
+ crctab[index ] >>= 1 ;
169
+ }
170
+ }
171
+ }
172
+ }
173
+
174
+ uint32_t ComputeCRC32 (const uint8_t * data, const size_t data_length) {
175
+ uint32_t crc32 = ~0U ;
176
+
177
+ for (size_t i = 0 ; i < data_length; i++) {
178
+ // crctab is an array of 256 32-bit constants
179
+ const uint32_t index = (crc32 ^ data[i]) & (kCrctabLen - 1 );
180
+ crc32 = (crc32 >> 8 ) ^ crctab[index ];
181
+ }
182
+
183
+ // invert all bits of result
184
+ crc32 ^= ~0U ;
185
+ return crc32;
186
+ }
187
+
188
+ void ShowOutputCRC32 (tflite::MicroInterpreter* interpreter) {
189
+ GenCRC32Table ();
190
+ for (size_t i = 0 ; i < interpreter->outputs_size (); ++i) {
191
+ TfLiteTensor* output = interpreter->output_tensor (i);
192
+ uint8_t * output_values = tflite::GetTensorData<uint8_t >(output);
193
+ uint32_t crc32_value = ComputeCRC32 (output_values, output->bytes );
194
+ MicroPrintf (" Output CRC32: 0x%X" , crc32_value);
195
+ }
196
+ }
197
+
198
+ void ShowInputCRC32 (tflite::MicroInterpreter* interpreter) {
199
+ GenCRC32Table ();
200
+ for (size_t i = 0 ; i < interpreter->inputs_size (); ++i) {
201
+ TfLiteTensor* input = interpreter->input_tensor (i);
202
+ uint8_t * input_values = tflite::GetTensorData<uint8_t >(input);
203
+ uint32_t crc32_value = ComputeCRC32 (input_values, input->bytes );
204
+ MicroPrintf (" Input CRC32: 0x%X" , crc32_value);
205
+ }
206
+ }
207
+
133
208
int Benchmark (const uint8_t * model_data, tflite::PrettyPrintType print_type) {
134
- Profiler profiler;
209
+ static Profiler profiler;
210
+ static Profiler profiler2;
211
+ TfLiteStatus status;
212
+
213
+ // use this to keep the application size stable regardless of whether
214
+ // compression is being used
215
+ #ifdef USE_TFLM_COMPRESSION
216
+ constexpr bool using_compression = true ;
217
+ #else // USE_TFLM_COMPRESSION
218
+ constexpr bool using_compression = false ;
219
+ #endif // USE_TFLM_COMPRESSION
220
+
135
221
alignas (16 ) static uint8_t tensor_arena[kTensorArenaSize ];
136
222
137
- uint32_t event_handle = profiler.BeginEvent (" TfliteGetModel" );
223
+ #ifdef USE_ALT_DECOMPRESSION_MEM
224
+ std::initializer_list<tflite::MicroContext::AlternateMemoryRegion>
225
+ alt_memory_region = {{g_alt_memory, kAltMemorySize }};
226
+ #endif // USE_ALT_DECOMPRESSION_MEM
227
+
228
+ uint32_t event_handle = profiler.BeginEvent (" tflite::GetModel" );
138
229
const tflite::Model* model = tflite::GetModel (model_data);
139
230
profiler.EndEvent (event_handle);
140
231
232
+ event_handle = profiler.BeginEvent (" tflite::CreateOpResolver" );
141
233
TflmOpResolver op_resolver;
142
- TF_LITE_ENSURE_STATUS (CreateOpResolver (op_resolver));
234
+ status = CreateOpResolver (op_resolver);
235
+ if (status != kTfLiteOk ) {
236
+ MicroPrintf (" tflite::CreateOpResolver failed" );
237
+ return -1 ;
238
+ }
239
+ profiler.EndEvent (event_handle);
143
240
241
+ event_handle = profiler.BeginEvent (" tflite::RecordingMicroAllocator::Create" );
144
242
tflite::RecordingMicroAllocator* allocator (
145
243
tflite::RecordingMicroAllocator::Create (tensor_arena, kTensorArenaSize ));
244
+ profiler.EndEvent (event_handle);
245
+ event_handle = profiler.BeginEvent (" tflite::MicroInterpreter instantiation" );
146
246
tflite::RecordingMicroInterpreter interpreter (
147
247
model, op_resolver, allocator,
148
248
tflite::MicroResourceVariables::Create (allocator, kNumResourceVariable ),
149
249
&profiler);
150
- TF_LITE_ENSURE_STATUS (interpreter.AllocateTensors ());
250
+ profiler.EndEvent (event_handle);
251
+
252
+ #ifdef USE_ALT_DECOMPRESSION_MEM
253
+ event_handle =
254
+ profiler.BeginEvent (" tflite::MicroInterpreter::SetDecompressionMemory" );
255
+ status = interpreter.SetDecompressionMemory (alt_memory_region);
256
+ if (status != kTfLiteOk ) {
257
+ MicroPrintf (" tflite::MicroInterpreter::SetDecompressionMemory failed" );
258
+ return -1 ;
259
+ }
260
+ profiler.EndEvent (event_handle);
261
+ #endif // USE_ALT_DECOMPRESSION_MEM
262
+
263
+ event_handle =
264
+ profiler.BeginEvent (" tflite::MicroInterpreter::AllocateTensors" );
265
+ status = interpreter.AllocateTensors ();
266
+ if (status != kTfLiteOk ) {
267
+ MicroPrintf (" tflite::MicroInterpreter::AllocateTensors failed" );
268
+ return -1 ;
269
+ }
270
+ profiler.EndEvent (event_handle);
151
271
152
- profiler.Log ();
272
+ profiler.LogTicksPerTagCsv ();
153
273
profiler.ClearEvents ();
154
274
275
+ if (using_compression) {
276
+ status = interpreter.SetAlternateProfiler (&profiler2);
277
+ if (status != kTfLiteOk ) {
278
+ MicroPrintf (" tflite::MicroInterpreter::SetAlternateProfiler failed" );
279
+ return -1 ;
280
+ }
281
+ }
282
+
155
283
MicroPrintf (" " ); // null MicroPrintf serves as a newline.
156
284
157
- // For streaming models, the interpreter will return kTfLiteAbort if the model
158
- // does not yet have enough data to make an inference. As such, we need to
159
- // invoke the interpreter multiple times until we either receive an error or
160
- // kTfLiteOk. This loop also works for non-streaming models, as they'll just
161
- // return kTfLiteOk after the first invocation.
285
+ // For streaming models, the interpreter will return kTfLiteAbort if the
286
+ // model does not yet have enough data to make an inference. As such, we
287
+ // need to invoke the interpreter multiple times until we either receive an
288
+ // error or kTfLiteOk. This loop also works for non-streaming models, as
289
+ // they'll just return kTfLiteOk after the first invocation.
162
290
uint32_t seed = kRandomSeed ;
163
291
while (true ) {
164
292
SetRandomInput (seed++, interpreter);
165
- TfLiteStatus status = interpreter.Invoke ();
293
+ ShowInputCRC32 (&interpreter);
294
+ MicroPrintf (" " ); // null MicroPrintf serves as a newline.
295
+
296
+ status = interpreter.Invoke ();
166
297
if ((status != kTfLiteOk ) && (static_cast <int >(status) != kTfLiteAbort )) {
167
298
MicroPrintf (" Model interpreter invocation failed: %d\n " , status);
168
299
return -1 ;
@@ -174,6 +305,17 @@ int Benchmark(const uint8_t* model_data, tflite::PrettyPrintType print_type) {
174
305
MicroPrintf (" " ); // null MicroPrintf serves as a newline.
175
306
profiler.ClearEvents ();
176
307
308
+ if (using_compression) {
309
+ profiler2.Log ();
310
+ MicroPrintf (" " ); // null MicroPrintf serves as a newline.
311
+ profiler2.LogTicksPerTagCsv ();
312
+ MicroPrintf (" " ); // null MicroPrintf serves as a newline.
313
+ profiler2.ClearEvents ();
314
+ }
315
+
316
+ ShowOutputCRC32 (&interpreter);
317
+ MicroPrintf (" " ); // null MicroPrintf serves as a newline.
318
+
177
319
if (status == kTfLiteOk ) {
178
320
break ;
179
321
}
0 commit comments