-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathengine.py
executable file
·860 lines (727 loc) · 40.8 KB
/
engine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
#
# Copyright 1993-2018 NVIDIA Corporation. All rights reserved.
#
# NOTICE TO LICENSEE:
#
# This source code and/or documentation ("Licensed Deliverables") are
# subject to NVIDIA intellectual property rights under U.S. and
# international Copyright laws.
#
# These Licensed Deliverables contained herein is PROPRIETARY and
# CONFIDENTIAL to NVIDIA and is being provided under the terms and
# conditions of a form of NVIDIA software license agreement by and
# between NVIDIA and Licensee ("License Agreement") or electronically
# accepted by Licensee. Notwithstanding any terms or conditions to
# the contrary in the License Agreement, reproduction or disclosure
# of the Licensed Deliverables to any third party without the express
# written consent of NVIDIA is prohibited.
#
# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
# LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
# SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
# PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
# NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
# DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
# NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
# NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
# LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
# SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THESE LICENSED DELIVERABLES.
#
# U.S. Government End Users. These Licensed Deliverables are a
# "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
# 1995), consisting of "commercial computer software" and "commercial
# computer software documentation" as such terms are used in 48
# C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
# only as a commercial end item. Consistent with 48 C.F.R.12.212 and
# 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
# U.S. Government End Users acquire the Licensed Deliverables with
# only those rights set forth herein.
#
# Any use of the Licensed Deliverables in individual and commercial
# software must include, in the user documentation and internal
# comments to the code, the above Disclaimer and U.S. Government End
# Users Notice.
#
from __future__ import division
import numpy as np
import tensorrt as trt
from tensorrt.parsers import uffparser, caffeparser
class Engine(object):
'''A TensorRT engine with self containted logger, runtime, context and memory
Members:
- **logger** ``tensorrt.infer.Logger``: Engine Logger
- **log_sev** ``tensorrt.infer.LogSeverity``: Verboseness of the logger
- **max_batch_size** ``int``: Maximum supported batch size
- **max_workspace_size** ``int``: Maximum workspace size
- **data_type** ``tensorrt.infer.DataType``: Operating data type of the engine
- **src_framework** ``str``: Parser used to create engine
- **runtime** ``tensorrt.infer.Runtime``: Engine runtime
- **engine** ``tensorrt.infer.CudaEngine``: TensorRT Engine
- **context** ``tensorrt.infer.ExecutionContext``: Engine execution context
- **profiler** ``tensorrt.infer.Profiler``: Engine profiler
- **input_dim** ``[tensorrt.infer.DimsCHW]``: Input layer dimensions
- **output_dim** ``[tensorrt.infer.DimsCHW]``: Output layer dimensions
- **input_names** ``[str]``: Input layer names
- **output_names** ``[str]``: Output layer names
- **d_input** ``[pycuda.DeviceAllocation]``: GPU Buffer allocations
- **d_output** ``[pycuda.DeviceAllocation]``: GPU Buffer allocations
- **preprocessors** ``{str:function}``: Dictionary of input layer names and preprocessing functions (or None)
- **postprocessors** ``{str:function}``: Dictionary of output layer names and postprocessing functions (or None)
- **bindings** ``[int]``: Buffer Pointers
Methods:
- __init__(self, **kwargs)
- __del__(self)
- _create_engine(self, **kwargs)
- infer(self, *input_data)
- save(self, str)
- supported_data_format(self, *input_data)
- uniform_data_format(self, *input_data)
- convert_LCHW_to_LNCHW(self, input_data)
- transform_to_LNCHW(self, *input_data)
- verify_data_type(self, *input_data)
- format_data(self, output_data)
- apply_postprocessing(self, layer_data, layer_postprocessor)
- log_info(self, msg)
- log_error(self, msg)
- log_warn(self, msg)
'''
def __init__(self, **kwargs):
'''Create a self contained inference engine
Args:
- **device** ``int`` *optional*: Device number or PCI bus ID of GPU to use *Default: ``0``*
- **logger** ``<class> tensorrt.infer.Logger`` *optional*: Engine Logger *Default: ``tensorrt.infer.ColorLogger``*
- **log_sev** ``tensorrt.infer.LogSeverity`` *optional*: Logger Verboseness *Default: ``tensorrt.infer.LogSeverity.INFO``*
- **max_batch_size** ``int`` *optional*: Max batch size *Default: ``1``*
- **max_workspace_size** ``int`` *optional*: Max worspace size *Default: ``1 << 20``*
- **data_type** ``tensorrt.infer.DataType`` *optional*: Operating Data Type *Default: ``tensorrt.infer.DataType.FLOAT``*
- **framework** ``str``: Source framework ("tf", "uff", "caffe")
- "tf" and "uff" requires one of the following arguments:
- **path** ``str``: Path to frozen file
- **stream** ``str/bytes``: Serialized graph
- "caffe" requires both a path to deploy file and path to model file
- **deployfile** ``str``: Path to deploy file
- **modelfile** ``str``: Path to model file
and
- **input_nodes** ``{str : (C, H, W)/[C, H, W]}``: Names and sizes of input nodes
- **output_nodes** ``[str]``: Names of output nodes
- **PLAN** ``str``: Path to a PLAN file
- **engine_stream** ``str/[bytes]``: serialized TensorRT engine stream
- **calibrator** ``tensorrt.infer.Calibrator`` *optional*: Int8 Calibrator
- **profiler** ``<class> tensorrt.infer.Profiler`` *optional*: Engine profiler
- **preprocessors** ``{str:function}``: Dictionary of input layer names and preprocessing functions (or None)
- **postprocessors** ``{str:function}``: Dictionary of output layer names and postprocessing functions (or None)
'''
# We only need to import these if the Engine is actually created.
try:
from importlib import import_module
self.cuda = import_module("pycuda.driver")
self.cuda.init()
except ImportError as err:
raise ImportError("""ERROR: Failed to import module({})
Please make sure you have pycuda and the example dependencies installed.
sudo apt-get install python(3)-pycuda
pip install tensorrt[examples]""".format(err))
device = 0 if not "device" in kwargs else kwargs["device"]
self.device = self.cuda.Device(device)
self.cuda_context = self.device.make_context()
# Push the context - and make sure to pop it before returning!
self.cuda_context.push()
logger = trt.infer.ColorLogger
log_sev = trt.infer.LogSeverity.INFO
self.max_batch_size = 1
self.max_workspace_size = 1 << 20
self.data_type = trt.infer.DataType.FLOAT
# Do all CUDA allocations with this size. If you try to allocate with HALF, it will produce incorrect results.
self.alloc_data_type = trt.infer.DataType.FLOAT
self.src_framework = None
self.context = None
self.engine = None
self.runtime = None
for k,v in kwargs.items():
if k == "logger":
logger = v
elif k == "logger_severity":
log_sev = v
elif k == "max_batch_size":
self.max_batch_size = v
elif k == "max_workspace_size":
self.max_workspace_size = v
elif k == "data_type":
self.data_type = v
self.logger = logger(log_sev)
self.runtime = trt.infer.create_infer_runtime(self.logger)
frwk = kwargs.get("framework", None)
if frwk:
self.log_info("Detecting Framework")
modelstream = None
if frwk == "tf" or frwk == "tensorflow":
self.src_framework = "uff"
path = kwargs.get("path", None)
stream = kwargs.get("stream", None)
output_nodes = kwargs.get("output_nodes", None)
input_nodes = kwargs.get("input_nodes", None)
if not (bool(path) ^ bool(stream)) or not output_nodes or not input_nodes:
self.log_error("Need either modelstream or filepath, output nodes and input nodes to create engine from tensorflow graph")
try:
import uff
except ImportError as err:
raise ImportError("""ERROR: Failed to import module ({})
Please make sure you have the UFF toolkit installed.
For installation instructions, see:
https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/#python and click on the 'TensoRT Python API' link""".format(err))
if stream:
modelstream = uff.from_tensorflow(stream, output_nodes)
else:
modelstream = uff.from_tensorflow_frozen_model(path, output_nodes)
elif frwk == "uff":
self.src_framework = "uff"
path = kwargs.get("path", None)
stream = kwargs.get("stream", None)
output_nodes = kwargs.get("output_nodes", None)
input_nodes = kwargs.get("input_nodes", None)
if not (bool(path) ^ bool(stream)) or not output_nodes or not input_nodes:
self.log_error("Need either modelstream or filepath, input nodes and output nodes to create engine from uff graph")
if stream:
modelstream = stream
elif frwk == "caffe" or frwk == "c1":
self.src_framework = "caffe"
modelfile = kwargs.get("modelfile", None)
deployfile = kwargs.get("deployfile", None)
output_nodes = kwargs.get("output_nodes", None)
if not (modelfile and deployfile) or not output_nodes:
self.log_error("Need a model file and deployfile, and output nodes to create engine from caffe")
else:
self.log_error("Unsupported framework")
self._create_engine(modelstream, **kwargs)
elif kwargs.get("PLAN", None):
self.engine = trt.utils.load_engine(self.logger, kwargs.get("PLAN", None), kwargs.get("plugins", None))
self.max_batch_size = self.engine.get_max_batch_size()
elif kwargs.get("engine_stream", None):
self.engine = self.runtime.deserialize_cuda_engine(kwargs.get("engine_stream", None), kwargs.get("plugins", None))
self.max_batch_size = self.engine.get_max_batch_size()
else:
self.log_error("No supported engine source provided")
self.log_info("Verifying engine construction was successful")
assert(self.engine)
self.context = self.engine.create_execution_context()
if kwargs.get("profiler", None):
self.profiler = kwargs.get("profiler", None)
self.context.set_profiler(self.profiler)
self.log_info("Allocating GPU buffers")
nb_bindings = self.engine.get_nb_bindings()
input_index = []
output_index = []
for b in range(nb_bindings):
if self.engine.binding_is_input(b):
input_index.append(b)
else:
output_index.append(b)
self.input_dim = [self.engine.get_binding_dimensions(i).to_DimsCHW() for i in input_index]
self.output_dim = [self.engine.get_binding_dimensions(o).to_DimsCHW() for o in output_index]
insize = [self.max_batch_size * i.vol() * self.alloc_data_type.input_type().itemsize for i in self.input_dim]
outsize = [self.max_batch_size * o.vol() * self.alloc_data_type.input_type().itemsize for o in self.output_dim]
self.d_input = [self.cuda.mem_alloc(i) for i in insize]
self.d_output = [self.cuda.mem_alloc(o) for o in outsize]
self.bindings = [int(i) for i in self.d_input]
self.bindings = self.bindings + [int(i) for i in self.d_output]
self.input_names = [self.engine.get_binding_name(i) for i in input_index]
self.output_names = [self.engine.get_binding_name(o) for o in output_index]
self.has_preprocessors, self.has_postprocessors = False, False
if kwargs.get("preprocessors", None):
if len(kwargs["preprocessors"]) < len(self.input_dim):
self.log_error('''In order to use the internal preprocessor, a function or a None must be provided for each input node, saw {}, expected {}'''.format(len(kwargs["preprocessors"]),len(self.input_dim)))
elif len(kwargs["preprocessors"]) > len(self.input_dim):
self.log_error('''Too many functions in preprocessing function table, saw {}, expected {}'''.format(len(kwargs["preprocessors"]),len(self.input_dim)))
else:
self.log_info("Registering preprocessor function table")
self.preprocessors = kwargs["preprocessors"]
self.has_preprocessors = True
if kwargs.get("postprocessors", None):
if len(kwargs["postprocessors"]) < len(self.input_dim):
self.log_error('''In order to use the internal postprocessor, a function or a None must be provided for each output node, saw {}, expected {}'''.format(len(kwargs["postprocessors"]),len(self.input_dim)))
elif len(kwargs["postprocessors"]) > len(self.input_dim):
self.log_error('''Too many functions in postprocessor function table, saw {}, expected {}'''.format(len(kwargs["postprocessors"]),len(self.input_dim)))
else:
self.log_info("Registering postprocessor function table")
self.postprocessors = kwargs["postprocessors"]
self.has_postprocessors = True
# Remove this from the context stack so Lite Engine is self contained.
self.cuda_context.pop()
self.runtime.destroy()
self.runtime = None
def __del__(self):
if self.cuda_context:
# Must remove this context on destruction.
self.cuda_context.pop()
if self.context:
self.context.destroy()
if self.engine:
self.engine.destroy()
if self.runtime:
self.runtime.destroy()
def _create_engine(self, modelstream, **kwargs):
'''
Helper to create engine when trying to build from models
'''
self.log_info("Parsing Model from {}".format(self.src_framework))
if self.src_framework == "uff":
parser = uffparser.create_uff_parser()
for k,v in kwargs["input_nodes"].items():
parser.register_input(k,v, 0)
for o in kwargs["output_nodes"]:
parser.register_output(o)
if modelstream:
self.engine = trt.utils.uff_to_trt_engine(self.logger,
modelstream,
parser,
self.max_batch_size,
self.max_workspace_size,
self.data_type,
None, #TODO: Figure out if plugins are supported in UFF
kwargs.get("calibrator", None))
else:
self.engine = trt.utils.uff_file_to_trt_engine(self.logger,
kwargs["path"],
parser,
self.max_batch_size,
self.max_workspace_size,
self.data_type,
None, #TODO: Figure out if plugins are supported in UFF
kwargs.get("calibrator", None))
parser.destroy()
elif self.src_framework == "caffe":
self.engine = trt.utils.caffe_to_trt_engine(self.logger,
kwargs["deployfile"],
kwargs["modelfile"],
self.max_batch_size,
self.max_workspace_size,
kwargs["output_nodes"],
self.data_type,
kwargs.get("plugins", None),
kwargs.get("calibrator", None))
def save(self, path):
'''
Save the TensorRT Engine to a PLAN file
Saves the TensorRT Engine to a PLAN file that can be used later.
*Note:* This saves the only the internal TensorRT engine in the class
not the Engine object and all provided settings.
Args:
- **path** ``str``: Desired path to save file
'''
trt.utils.write_engine_to_file(path, self.engine.serialize())
def supported_data_format(self, *input_data):
'''
Dectects wether the provided data is one of the supported types
Args:
- **input_data** ``tuple`` Tuple of lists of data for input layers
Returns:
- ``None``
Side-Effects:
- Sets the input format detected (LLCHW, LNCHW, LCHW, ZNCHW, NCHW, CHW)
Raises:
- ``Value Error``: Unsupported data format
'''
self.log_info("Detecting input data format")
if type(input_data[0]) is list: #Num Batches?, Batches?
if type(input_data[0][0]) is list: #Batches?
if type(input_data[0][0][0]) is np.ndarray and input_data[0][0][0].ndim == 3:
self.input_format = "LLCHW"
else:
self.log_error('''Unsupported data format, expect 5D numpy, List of NCHWs, List of CHWs, List of List of CHWs, Single NCHW or Single CHW''')
elif type(input_data[0][0]) is np.ndarray: #Batches?, Img?
if input_data[0][0].ndim == 4: #List of NCHW
self.input_format = "LNCHW"
elif input_data[0][0].ndim == 3: #List of CHW
self.input_format = "LCHW"
else:
self.log_error('''Unsupported data format, expect 5D numpy, List of NCHWs, List of CHWs, List of List of CHWs, Single NCHW or Single CHW''')
elif type(input_data[0]) is np.ndarray:
if input_data[0].ndim == 5:
self.input_format = "ZNCHW"
elif input_data[0].ndim == 4:
self.input_format = "NCHW"
elif input_data[0].ndim == 3:
self.input_format = "CHW"
else:
self.log_error('''Unsupported data format, expect 5D numpy, List of NCHWs, List of CHWs, List of List of CHWs, Single NCHW or Single CHW''')
else:
self.log_error('''Unsupported data format, expect 5D numpy, List of NCHWs, List of CHWs, List of List of CHWs Single NCHW or Single CHW''')
self.log_info("Dectected data format {}".format(self.input_format))
def uniform_data_format(self, *input_data):
'''
Verifies that the data format is uniform accross all input layers and
that it is uniform accross batches
Args:
- **input_data** ``tuple`` Tuple of lists of data for input layers
Returns:
- ``None``
Raises:
- ``Value Error``: If number of batches, batch size or CHW dims differ or if the batch size is too big
'''
self.log_info("Verifying data format is uniform accross all input layers")
for i in input_data:
#CHECK NUM BATCHES IS THE SAME AND BATCH SIZES MATCH
if self.input_format == "LLCHW":
#NUM BATCHES
if len(input_data[0]) != len(i):
self.log_error('''Input format detected as [[CHW]], len([[CHW]]) for input layers not uniform a.k.a number of batches not uniform, expected {}, saw {}'''.format(len(input_data[0]),len(i)))
#BATCH SIZES MATCH
for j in range(len(input_data[0])):
if len(input_data[0][j]) != len(i[j]):
self.log_error('''Input format detected as [[CHW]], len([CHW]) for input layers not uniform a.k.a batch size not uniform, expected {}, saw {}'''.format(len(input_data[0][j]),len(i[j])))
if len(i[j]) > self.max_batch_size:
self.log_error('''Input format detected as [[CHW]], len([CHW]) greater than engine max batch size, saw {}, max {}'''.format(len(i[j]), self.max_batch_size))
#CHECK SAME CHW DIMS
dims = i[0][0].shape
for j in i:
for k in j:
if k.shape != dims:
self.log_error('''Input format detected as [[CHW]], found CHW tensors to not be uniform, saw {}, expected {}'''.format(k.shape, dims))
#CHECK NUM BATCHES IS THE SAME AND BATCH SIZES MATCH
elif self.input_format == "LNCHW":
#NUM BATCHES
if len(input_data[0]) != len(i):
self.log_error('''Input format detected as [NCHW], len([NCHW]) for input layers not uniform a.k.a number of batches not uniform, expected {}, saw {}'''.format(len(input_data[0]),len(i)))
for j in range(len(input_data[0])):
if input_data[0][j].shape[0] != i[j].shape[0]:
self.log_error('''Input format detected as [NCHW], batch size not uniform, expected {}, saw {}'''.format(input_data[0][j].shape[0], i[j].shape[0]))
dims = i[0].shape
for j in i:
if j.shape != dims:
self.log_error('''Input format detected as [NCHW], found NCHW tensors to not be uniform in the list, saw {}, expected {}'''.format(j.shape, dims))
#CHECK BATCH SIZES MATCH
elif self.input_format == "LCHW":
if len(input_data[0]) != len(i):
self.log_error('''Input format detected as [CHW], len([CHW]) for input layers
not uniform a.k.a batch sizes not uniform, expected {}, saw {}'''.format(len(input_data[0]),len(i)))
dims = i[0].shape
for j in i:
if j.shape != dims:
self.log_error('''Input format detected as [CHW],
found CHW tensors to not be uniform, saw {}, expected {}'''.format(j.shape, dims))
#CHECK NUM BATCHES ARE THE SAME AND BATCH SIZES MATCH
elif self.input_format == "ZNCHW":
if input_data[0].shape[0] != i.shape[0]:
self.log_error('''Input format detected as ZNCHW,
for input layes number of batches not uniform, expected {}, saw {}'''.format(input_data[0].shape[0],i.shape[0]))
if input_data[0].shape[1] != i.shape[1]:
self.log_error('''Input format detected as ZNCHW,
for input layes batch sizes not uniform, expected {}, saw {}'''.format(len(input_data[0].shape[1]),i.shape[1]))
#CHECK BATCHES ARE THE SAME
elif self.input_format == "NCHW":
if input_data[0].shape[0] != i.shape[0]:
self.log_error('''Input format detected as NCHW,
for input layes batch sizes not uniform, expected {}, saw {}'''.format(len(input_data[0].shape[0]),i.shape[0]))
elif self.input_format == "CHW":
pass
else:
self.log_error("Engine input format set to something other than supported types, saw {}".format(self.input_format))
def convert_LCHW_to_LNCHW(self, input_data):
'''
Converts data from LCHW format to LNCHW
Helper for transform_to_LNCHW to convert LCHW format to LNCHW
Args:
- **input_data** ``list`` List of lists of LCHW data
Returns:
- ``list`` List of list of LNCHW data
'''
for i in range(len(input_data)):
data = []
if len(input_data[i]) > self.max_batch_size:
for b in range(len(input_data[i]) // self.max_batch_size):
data.append(np.array(input_data[i][(b * self.max_batch_size) : ((b+1) * self.max_batch_size)]))
if len(input_data[i]) % self.max_batch_size:
data.append(np.array(input_data[i][-(len(input_data[i]) % self.max_batch_size):]))
else:
data.append(np.array(input_data[i]))
input_data[i] = data
return input_data
def transform_to_LNCHW(self, *input_data):
'''
Converts supported data formats to LNCHW
Converts data from LLCHW, LCHW, ZNCHW, NCHW, CHW to LNCHW (List of batches)
For LLCHW, ZNCHW, LNCHW; batch size (Length of internal lists) must be smaller than
the engine's max batch size (default: 1)
Args:
- **input_data** ``tuple`` Tuple of lists of data for input layers
Returns:
- ``list``: Data formated as a list of batches for inference
Raises:
- ``Value Error``: Batch size too large
'''
if self.input_format == "LLCHW":
input_data_list = []
for i in range(len(input_data)):
data = []
for j in range(len(input_data[i])):
if len(input_data[i][j]) > self.max_batch_size:
self.log_error("Batch size is too large for engine, saw {}, max: {}".format(i[0].shape[0],
self.max_batch_size))
data.append(np.array(input_data[i][j]))
input_data_list.append(data)
return input_data_list
elif self.input_format == "LNCHW":
for i in input_data:
for b in i:
if b.shape[0] > self.max_batch_size:
self.log_error("Batch size is too large for engine, saw {}, max: {}".format(i[0].shape[0],
self.max_batch_size))
return input_data
elif self.input_format == "LCHW":
input_data_list = []
for i in input_data:
input_data_list.append(i)
return self.convert_LCHW_to_LNCHW(input_data_list)
elif self.input_format == "ZNCHW":
input_data_list = []
for i in input_data:
if i[0].shape[1] > self.max_batch_size:
self.log_error("Batch size is too large for engine, saw {}, max: {}".format(i[0].shape[0],
self.max_batch_size))
for i in range(len(input_data)):
data = []
for b in range(len(input_data[i])):
data.append(input_data[i][b])
input_data_list.append(data)
return input_data_list
elif self.input_format == "NCHW":
needs_reshaping = False
input_data_list = []
for i in input_data:
if i.shape[0] > self.max_batch_size:
self.log_warn('''Batch size is too large for engine, saw {}, max: {}, trying internal reshaping'''.format(i.shape[0], self.max_batch_size))
needs_reshaping = True
break
if needs_reshaping:
for i in range(len(input_data)):
data = input_data[i]
batches = []
for b in range(input_data[i].shape[0] // self.max_batch_size):
batches.append(data[(b * self.max_batch_size) : ((b + 1) * self.max_batch_size)])
if input_data[i].shape[0] % self.max_batch_size:
batches.append(data[-(input_data[i].shape[0] % self.max_batch_size):])
input_data_list.append(batches)
else:
for i in input_data:
input_data_list.append([i])
return input_data_list
elif self.input_format == "CHW":
input_data_list = []
for i in range(len(input_data)):
data = []
data.append(input_data[i])
input_data_list.append([np.array(data)])
return input_data_list
def verify_data_type(self, input_data):
'''
Verifies if the data type is the expected type for the engine
Verifies if the data type is the expected type for the engine,
will attempt to convert the data to the expected type.
Will provide a warning if a different data type is detected.
May be the cause of incorrect results from the engine, if the
data cannot be converted.
Args:
- **input_data** ``list``: List of LNCHW data
Returns:
- ``list``: List of LNCHW data with the correct type
Side-Effects:
- If data type is changed, warning will be printed in the engine logger,
make sure logger severity is to at least warning to see.
'''
self.log_info("Verifying batches are the expected data type")
for l in input_data:
for b in range(len(l)):
if l[b].dtype != self.data_type.input_type():
self.log_warn("Input batch data type is not the expected data type for the engine, saw {}, expected {}. Attempting conversion".format(l[b].dtype, self.data_type.input_type()))
l[b] = l[b].astype(self.data_type.input_type())
return input_data
def infer(self, *input_data):
'''
Run inference on a set of data
Runs inference on a set of data provided by the user.
Data must be provided in a supportted data format:
- CHW: Single 3D numpy array in form Channels x Height x Width
- NCHW: Single 4D numpy array in form Batch Size x Channels x Height x Width *note: If the batch size is larger than the supported max batch size, the function will attempt to split up the batch into smaller supported batches*
- ZNCHW: Single 5D numpy array in the form Number of Batces x Batch size x Channels x Height x Width *note: if the batch size is larger than the supported max batch size, the function will error out*
- LNCHW: List of 4D numpy arrays in form Batch Size x Channels x Height x Width *note: if the batch size is larger than the supported max batch size, the function will error out*
- LLCHW: List of lists of 3D numpy arrays in form Channels x Height x Width *note: if the size of the inner lists are is larger than the supported max batch size, or the size of the inner lists are not uniform the function will error out*
- LCHW: List of 3D numpy arrays in form Channels x Height x Width *note: If the size of the list is larger than the supported max batch size, the function will attempt to split up the list into smaller supported batches*
Provide a seperate array for each input layer
If a preprocessor function table is registered with the engine at creation then before inference, each input data object (3D numpy array) will be passed into the user specified preprocessor function for the relevant input layer and inference will be run on the preprocessed data.
If a postprocessor function table is registered with the engine at creation then before inference, each output data object (3D numpy array) will be passed into the user specified postprocessor function for the relevant output layer and the function will return the postprocessed data.
Args:
- input_data,... ``list/numpy.ndarray``: List or numpy array containing data in a supported format, multiple lists/np.ndarrays should be passed in if there are multiple input layers, one per layer in order of bindings
Returns:
- ``list/numpy.ndarray``: Results of inference arranged in the same format the input data was
'''
# Make this the active context - make sure to pop before returning!
self.cuda_context.push()
if len(input_data) != len(self.d_input):
if len(input_data) > len(self.d_input):
self.log_error("Too many input data streams, saw {}, expected {}".format(len(input_data),
len(self.d_input)))
else:
self.log_error("Too few input data streams, saw {}, expected {}".format(len(input_data),
len(self.d_input)))
#Expect 5D Tensor, lists of NCHWs, lists of CHWs, single NCHW or single CHW
self.supported_data_format(*input_data)
self.uniform_data_format(*input_data)
input_data = self.transform_to_LNCHW(*input_data)
input_data = self.verify_data_type(input_data)
num_execs = len(input_data[0])
self.log_info("Executing inference")
self.log_info("Number of Batches: {}".format(num_execs))
output_data = []
for o in self.output_dim:
layer_out = []
for b in input_data[0]:
layer_out.append(np.empty((b.shape[0], o.C(), o.H(), o.W()), dtype=self.alloc_data_type.input_type()))
output_data.append(layer_out)
#CAN YOU CREATE MORE EXECUTION STREAMS?
stream = self.cuda.Stream()
for n in range(num_execs):
#Transfer nth batch for each input layer
for i in range(len(input_data)):
#In pipeline batch element preprocessing
if self.has_preprocessors and self.preprocessors[self.input_names[i]]:
old = input_data[i][n][0]
for chw in range(len(input_data[i][n])):
input_data[i][n][chw] = self.preprocessors[self.input_names[i]](input_data[i][n][chw])
#Transfer data to device
self.cuda.memcpy_htod_async(self.d_input[i], np.ascontiguousarray(input_data[i][n].astype(self.alloc_data_type.input_type())), stream)
stream.synchronize()
self.log_info("Execution batch size: {}".format(input_data[i][n].shape[0]))
self.context.enqueue(input_data[i][n].shape[0], self.bindings, stream.handle)
stream.synchronize()
for o in range(len(self.d_output)):
self.cuda.memcpy_dtoh_async(output_data[o][n], self.d_output[o], stream)
stream.synchronize()
#TODO: USE MULTIPLE STREAMS SO PEOPLE CAN GIVE DATA LARGER THAN BATCH SIZE (NEEDS EVENTS FROM PYCUDA)
output_data = self.format_data(output_data)
for o in range(len(output_data)):
if self.has_postprocessors and self.postprocessors[self.output_names[o]]:
self.log_info("Postprocessing output data for output layer " + self.output_names[o])
output_data[o] = self.apply_postprocessing(output_data[o], self.postprocessors[self.output_names[o]])
# Remove this from the context stack so infer is self contained.
stream = None
self.cuda_context.pop()
return output_data
def format_data(self, output_data):
'''
Format results to same shape as input data
Format the results from inference from the operating data format (LNCHW) to the same data format that input data was given in.
Args:
- **output_data** ``[LNCHW data]``: List of inference results in LNCHW
Returns:
- ``list``: List of results for each layer in the same format as the input data
'''
self.log_info("Formating output data")
frmt = self.input_format
formatted_output_data = []
for o in range(len(output_data)):
if frmt == "LLCHW":
batches = []
for b in range(len(output_data[o])):
batch = []
for c in range(len(output_data[o][b])):
batch.append(output_data[o][b][c])
batches.append(batch)
formatted_output_data.append(batches)
elif frmt == "LCHW":
batch_data = []
for b in range(len(output_data[o])):
for c in range(len(output_data[o][b])):
batch_data.append(output_data[o][b][c])
formatted_output_data.append(batch_data)
elif frmt == "LNCHW":
return output_data
elif frmt == "ZNCHW":
formatted_output_data.append(np.array(output_data[o]))
elif frmt == "NCHW":
batch_data = []
for b in range(len(output_data[o])):
for c in range(len(output_data[o][b])):
batch_data.append(output_data[o][b][c])
formatted_output_data.append(np.array(batch_data))
elif frmt == "CHW":
formatted_output_data.append(output_data[o][0][0])
return formatted_output_data
def apply_postprocessing(self, layer_data, layer_postprocessor):
'''
Apply the user specified postprocessing function to results
Takes outputs for a layer and the layer's specified postprocessor
function and processes each result (3D numpy array). Stores the
result in the place of the 3D Numpy array.
Args:
- **layer_data** ``list/numpy.ndarray``: Formated results for a layer
- **layer_postprocessor** ``function``: Layer's post processing function
Results:
- ``list/numpy.ndarray``: Post processed results
'''
frmt = self.input_format
if frmt == "LLCHW":
for i in range(len(layer_data)):
for j in range(len(layer_data[i])):
layer_data[i][j] = layer_postprocessor(layer_data[i][j])
return layer_data
elif frmt == "LCHW":
for i in range(len(layer_data)):
layer_data[i] = layer_postprocessor(layer_data[i])
return layer_data
elif frmt == "LNCHW":
for i in range(len(layer_data)):
new_data = []
for j in range(len(layer_data[i])):
new_data.append(layer_postprocessor(layer_data[i][j]))
layer_data[i] = np.array(new_data)
return layer_data
elif frmt == "ZNCHW":
new_data_container = []
for i in range(len(layer_data)):
new_data = []
for j in range(len(layer_data[i])):
new_data.append(layer_postprocessor(layer_data[i][j]))
new_data_container.append(np.array(new_data))
return np.array(new_data_container)
elif frmt == "NCHW":
new_data = []
for i in range(len(layer_data)):
new_data.append(layer_postprocessor(layer_data[i]))
return np.array(new_data)
elif frmt == "CHW":
return layer_postprocessor(layer_data)
return layer_data
def log_info(self, msg):
'''
Helper for printing engine status
Args:
- **msg** ``str``: What to print
Side-effects:
- Prints message to console in the INFO stream
'''
self.logger.log(trt.infer.LogSeverity.INFO, msg)
def log_warn(self, msg):
'''
Helper for printing engine warnings
Args:
- **msg** ``str``: What to print
Side-effects:
- Prints message to console in the WARNING stream
'''
self.logger.log(trt.infer.LogSeverity.WARNING, msg)
def log_error(self, msg):
'''
Helper for printing engine errors
Args:
- **msg** ``str``: What to print
Side-effects:
- Prints message to console in the ERROR stream
Raises:
- ValueError
'''
self.logger.log(trt.infer.LogSeverity.ERROR, msg)
raise ValueError(msg)