@@ -34,11 +34,12 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16):
34
34
print (f'{ prefix } CUDA not detected, using default CPU batch-size { batch_size } ' )
35
35
return batch_size
36
36
37
+ d = str (device ).upper () # 'CUDA:0'
37
38
t = torch .cuda .get_device_properties (device ).total_memory / 1024 ** 3 # (GB)
38
39
r = torch .cuda .memory_reserved (device ) / 1024 ** 3 # (GB)
39
40
a = torch .cuda .memory_allocated (device ) / 1024 ** 3 # (GB)
40
41
f = t - (r + a ) # free inside reserved
41
- print (f'{ prefix } { t :.3g} G total, { r :.3g} G reserved, { a :.3g} G allocated, { f :.3g} G free' )
42
+ print (f'{ prefix } { d } { t :.3g} G total, { r :.3g} G reserved, { a :.3g} G allocated, { f :.3g} G free' )
42
43
43
44
batch_sizes = [1 , 2 , 4 , 8 , 16 ]
44
45
try :
@@ -50,9 +51,8 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16):
50
51
y = [x [2 ] for x in y if x ] # memory [2]
51
52
batch_sizes = batch_sizes [:len (y )]
52
53
p = np .polyfit (batch_sizes , y , deg = 1 ) # first degree polynomial fit
53
- f_intercept = int ((f * fraction - p [1 ]) / p [0 ]) # optimal batch size
54
- print (f'{ prefix } batch-size { f_intercept } estimated to utilize '
55
- f'{ str (device ).upper ()} { t * fraction :.3g} G/{ t :.3g} G ({ fraction * 100 :.0f} %)' )
56
- return f_intercept
54
+ b = int ((f * fraction - p [1 ]) / p [0 ]) # y intercept (optimal batch size)
55
+ print (f'{ prefix } batch-size { b } estimated to utilize { d } { t * fraction :.3g} G/{ t :.3g} G ({ fraction * 100 :.0f} %)' )
56
+ return b
57
57
58
58
# autobatch(torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False))
0 commit comments