|
36 | 36 | from models.experimental import attempt_load
|
37 | 37 | from models.yolo import Model
|
38 | 38 | from utils.autoanchor import check_anchors
|
| 39 | +from utils.autobatch import check_train_batch_size |
39 | 40 | from utils.datasets import create_dataloader
|
40 | 41 | from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
|
41 | 42 | strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \
|
@@ -131,6 +132,14 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
|
131 | 132 | print(f'freezing {k}')
|
132 | 133 | v.requires_grad = False
|
133 | 134 |
|
| 135 | + # Image size |
| 136 | + gs = max(int(model.stride.max()), 32) # grid size (max stride) |
| 137 | + imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple |
| 138 | + |
| 139 | + # Batch size |
| 140 | + if RANK == -1 and batch_size == -1: # single-GPU only, estimate best batch size |
| 141 | + batch_size = check_train_batch_size(model, imgsz) |
| 142 | + |
134 | 143 | # Optimizer
|
135 | 144 | nbs = 64 # nominal batch size
|
136 | 145 | accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
|
@@ -190,11 +199,6 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
|
190 | 199 |
|
191 | 200 | del ckpt, csd
|
192 | 201 |
|
193 |
| - # Image sizes |
194 |
| - gs = max(int(model.stride.max()), 32) # grid size (max stride) |
195 |
| - nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj']) |
196 |
| - imgsz = check_img_size(opt.imgsz, gs, floor=gs * 2) # verify imgsz is gs-multiple |
197 |
| - |
198 | 202 | # DP mode
|
199 | 203 | if cuda and RANK == -1 and torch.cuda.device_count() > 1:
|
200 | 204 | logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n'
|
@@ -242,6 +246,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
|
242 | 246 | model = DDP(model, device_ids=[LOCAL_RANK], output_device=LOCAL_RANK)
|
243 | 247 |
|
244 | 248 | # Model parameters
|
| 249 | + nl = model.model[-1].nl # number of detection layers (to scale hyps) |
245 | 250 | hyp['box'] *= 3. / nl # scale to layers
|
246 | 251 | hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers
|
247 | 252 | hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers
|
@@ -440,7 +445,7 @@ def parse_opt(known=False):
|
440 | 445 | parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
|
441 | 446 | parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch.yaml', help='hyperparameters path')
|
442 | 447 | parser.add_argument('--epochs', type=int, default=300)
|
443 |
| - parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs') |
| 448 | + parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') |
444 | 449 | parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
|
445 | 450 | parser.add_argument('--rect', action='store_true', help='rectangular training')
|
446 | 451 | parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
|
|
0 commit comments