From 063ca566142d52600620d81708ceef354a8e9ee9 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 28 Sep 2021 12:21:40 +0100 Subject: [PATCH 01/17] Update EMA every X iters. --- references/classification/train.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/references/classification/train.py b/references/classification/train.py index 48ab75bc2c1..fed34721b77 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -27,7 +27,7 @@ def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, metric_logger.add_meter('img/s', utils.SmoothedValue(window_size=10, fmt='{value}')) header = 'Epoch: [{}]'.format(epoch) - for image, target in metric_logger.log_every(data_loader, print_freq, header): + for i, (image, target) in enumerate(metric_logger.log_every(data_loader, print_freq, header)): start_time = time.time() image, target = image.to(device), target.to(device) output = model(image) @@ -48,8 +48,8 @@ def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) metric_logger.meters['img/s'].update(batch_size / (time.time() - start_time)) - if model_ema: - model_ema.update_parameters(model) + if model_ema and i % model_ema.update_steps == 0: + model_ema.update_parameters(model) def evaluate(model, criterion, data_loader, device, print_freq=100, log_suffix=''): @@ -248,6 +248,7 @@ def main(args): model_ema = None if args.model_ema: model_ema = utils.ExponentialMovingAverage(model_without_ddp, device=device, decay=args.model_ema_decay) + model_ema.update_steps = args.model_ema_steps if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') @@ -372,9 +373,12 @@ def get_args_parser(add_help=True): parser.add_argument( '--model-ema', action='store_true', help='enable tracking Exponential Moving Average of model parameters') + parser.add_argument( + '--model-ema-steps', type=float, default=32, + help='the number of iterations that controls how often to update the EMA model (default: 32)') parser.add_argument( '--model-ema-decay', type=float, default=0.9, - help='decay factor for Exponential Moving Average of model parameters(default: 0.9)') + help='decay factor for Exponential Moving Average of model parameters (default: 0.9)') return parser From 02b4d42e73e8aa54d43ea1698c86c8f0285f977c Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 28 Sep 2021 13:03:05 +0100 Subject: [PATCH 02/17] Adding AdamW optimizer. --- references/classification/train.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/references/classification/train.py b/references/classification/train.py index fed34721b77..89629f049a2 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -202,8 +202,10 @@ def main(args): elif opt_name == 'rmsprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, eps=0.0316, alpha=0.9) + elif opt_name == 'adamw': + optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: - raise RuntimeError("Invalid optimizer {}. Only SGD and RMSprop are supported.".format(args.opt)) + raise RuntimeError(f"Invalid optimizer {args.opt}. Only SGD, RMSprop and AdamW are supported.") if args.apex: model, optimizer = amp.initialize(model, optimizer, From 33a90f711045d4bf2f51f25964c705910fece1bb Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 28 Sep 2021 13:55:16 +0100 Subject: [PATCH 03/17] Adjusting EMA decay scheme. --- references/classification/train.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/references/classification/train.py b/references/classification/train.py index 89629f049a2..36d27fa40e6 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -20,7 +20,7 @@ def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, - print_freq, apex=False, model_ema=None): + print_freq, args, apex=False, model_ema=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) @@ -41,6 +41,12 @@ def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, loss.backward() optimizer.step() + if model_ema and i % args.model_ema_steps == 0: + model_ema.update_parameters(model) + if epoch < args.lr_warmup_epochs: + # Reset ema buffer to keep copying weights during warmup period + model_ema.n_averaged.fill_(0) + acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) batch_size = image.shape[0] metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) @@ -48,9 +54,6 @@ def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) metric_logger.meters['img/s'].update(batch_size / (time.time() - start_time)) - if model_ema and i % model_ema.update_steps == 0: - model_ema.update_parameters(model) - def evaluate(model, criterion, data_loader, device, print_freq=100, log_suffix=''): model.eval() @@ -249,8 +252,16 @@ def main(args): model_ema = None if args.model_ema: - model_ema = utils.ExponentialMovingAverage(model_without_ddp, device=device, decay=args.model_ema_decay) - model_ema.update_steps = args.model_ema_steps + # Decay adjustment that aims to keep the decay independent from other hyper-parameters originally proposed at: + # https://github.com/facebookresearch/pycls/blob/f8cd9627/pycls/core/net.py#L123 + # + # total_ema_updates = (Dataset_size / n_GPUs) * epochs / (batch_size * EMA_steps) + # We consider constant = (Dataset_size / n_GPUs) for a given dataset/setup and ommit it. Thus: + # adjust = 1 / total_ema_updates ~= batch_size * EMA_steps / epochs + adjust = args.batch_size * args.model_ema_steps / args.epochs + alpha = 1.0 - args.model_ema_decay + alpha = min(1.0, alpha * adjust) + model_ema = utils.ExponentialMovingAverage(model_without_ddp, device=device, decay=1.0 - alpha) if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') @@ -376,11 +387,11 @@ def get_args_parser(add_help=True): '--model-ema', action='store_true', help='enable tracking Exponential Moving Average of model parameters') parser.add_argument( - '--model-ema-steps', type=float, default=32, + '--model-ema-steps', type=int, default=32, help='the number of iterations that controls how often to update the EMA model (default: 32)') parser.add_argument( - '--model-ema-decay', type=float, default=0.9, - help='decay factor for Exponential Moving Average of model parameters (default: 0.9)') + '--model-ema-decay', type=float, default=0.99999, + help='decay factor for Exponential Moving Average of model parameters (default: 0.99999)') return parser From cfdeede70716ee81242b76599d6d4a759b946fdd Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 28 Sep 2021 17:57:17 +0100 Subject: [PATCH 04/17] Support custom weight decay for Normalization layers. --- references/classification/train.py | 29 ++++++++++++++++++----------- test/test_ops.py | 14 ++++++++++++-- torchvision/ops/_utils.py | 28 ++++++++++++++++++++++++++-- 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/references/classification/train.py b/references/classification/train.py index 36d27fa40e6..4d93f073454 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -19,22 +19,21 @@ amp = None -def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, - print_freq, args, apex=False, model_ema=None): +def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args, model_ema=None): model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) metric_logger.add_meter('img/s', utils.SmoothedValue(window_size=10, fmt='{value}')) header = 'Epoch: [{}]'.format(epoch) - for i, (image, target) in enumerate(metric_logger.log_every(data_loader, print_freq, header)): + for i, (image, target) in enumerate(metric_logger.log_every(data_loader, args.print_freq, header)): start_time = time.time() image, target = image.to(device), target.to(device) output = model(image) loss = criterion(output, target) optimizer.zero_grad() - if apex: + if args.apex: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: @@ -197,16 +196,22 @@ def main(args): criterion = nn.CrossEntropyLoss(label_smoothing=args.label_smoothing) + if args.norm_weight_decay is None: + parameters = model.parameters() + else: + param_groups = torchvision.ops._utils.split_normalization_params(model) + wd_groups = [args.norm_weight_decay, args.weight_decay] + parameters = [{"params": p, "weight_decay": w} for p, w in zip(param_groups, wd_groups) if p] + opt_name = args.opt.lower() if opt_name.startswith("sgd"): - optimizer = torch.optim.SGD( - model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, - nesterov="nesterov" in opt_name) + optimizer = torch.optim.SGD(parameters, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, + nesterov="nesterov" in opt_name) elif opt_name == 'rmsprop': - optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.momentum, - weight_decay=args.weight_decay, eps=0.0316, alpha=0.9) + optimizer = torch.optim.RMSprop(parameters, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, + eps=0.0316, alpha=0.9) elif opt_name == 'adamw': - optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) + optimizer = torch.optim.AdamW(parameters, lr=args.lr, weight_decay=args.weight_decay) else: raise RuntimeError(f"Invalid optimizer {args.opt}. Only SGD, RMSprop and AdamW are supported.") @@ -281,7 +286,7 @@ def main(args): for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) - train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args.print_freq, args.apex, model_ema) + train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args, model_ema) lr_scheduler.step() evaluate(model, criterion, data_loader_test, device=device) if model_ema: @@ -326,6 +331,8 @@ def get_args_parser(add_help=True): parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') + parser.add_argument('--norm-weight-decay', default=None, type=float, + help='weight decay for Normalization layers (default: None, same value as --wd)') parser.add_argument('--label-smoothing', default=0.0, type=float, help='label smoothing (default: 0.0)', dest='label_smoothing') diff --git a/test/test_ops.py b/test/test_ops.py index 8ab23f3ff64..36682adc6d3 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -9,10 +9,10 @@ from PIL import Image import torch from functools import lru_cache -from torch import Tensor +from torch import nn, Tensor from torch.autograd import gradcheck from torch.nn.modules.utils import _pair -from torchvision import ops +from torchvision import models, ops from typing import Tuple @@ -1062,5 +1062,15 @@ def test_stochastic_depth(self, mode, p): assert p_value > 0.0001 +class TestUtils: + @pytest.mark.parametrize('norm_layer', [None, nn.BatchNorm2d, nn.LayerNorm]) + def test_split_normalization_params(self, norm_layer): + model = models.mobilenet_v3_large(norm_layer=norm_layer) + params = ops._utils.split_normalization_params(model, None if norm_layer is None else [norm_layer]) + + assert len(params[0]) == 92 + assert len(params[1]) == 82 + + if __name__ == '__main__': pytest.main([__file__]) diff --git a/torchvision/ops/_utils.py b/torchvision/ops/_utils.py index 7cc6367a7a4..2cf47f31c72 100644 --- a/torchvision/ops/_utils.py +++ b/torchvision/ops/_utils.py @@ -1,6 +1,6 @@ import torch -from torch import Tensor -from typing import List, Union +from torch import nn, Tensor +from typing import List, Optional, Tuple, Union def _cat(tensors: List[Tensor], dim: int = 0) -> Tensor: @@ -34,3 +34,27 @@ def check_roi_boxes_shape(boxes: Union[Tensor, List[Tensor]]): else: assert False, 'boxes is expected to be a Tensor[L, 5] or a List[Tensor[K, 4]]' return + + +def split_normalization_params(model: nn.Module, + norm_classes: Optional[List[type]] = None) -> Tuple[List[Tensor], List[Tensor]]: + # Adapted from https://github.com/facebookresearch/ClassyVision/blob/659d7f78/classy_vision/generic/util.py#L501 + if not norm_classes: + norm_classes = [nn.modules.batchnorm._BatchNorm, nn.LayerNorm, nn.GroupNorm] + + for t in norm_classes: + if not issubclass(t, nn.Module): + raise ValueError(f"Class {t} is not a subclass of nn.Module.") + + classes = tuple(norm_classes) + + norm_params = [] + other_params = [] + for module in model.modules(): + if next(module.children(), None): + other_params.extend(p for p in module.parameters(recurse=False) if p.requires_grad) + elif isinstance(module, classes): + norm_params.extend(p for p in module.parameters() if p.requires_grad) + else: + other_params.extend(p for p in module.parameters() if p.requires_grad) + return norm_params, other_params From 7ecc6d8a62fc15c6074c014660edf893d04349f9 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 28 Sep 2021 18:49:26 +0100 Subject: [PATCH 05/17] Fix identation bug. --- references/classification/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/references/classification/utils.py b/references/classification/utils.py index fad607636e5..8df3da4b7a3 100644 --- a/references/classification/utils.py +++ b/references/classification/utils.py @@ -181,7 +181,7 @@ def update_parameters(self, model): else: p_swa.detach().copy_(self.avg_fn(p_swa.detach(), p_model_, self.n_averaged.to(device))) - self.n_averaged += 1 + self.n_averaged += 1 def accuracy(output, target, topk=(1,)): From 0563f9e3b055791f85bfeab75658801a3961f387 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Wed, 29 Sep 2021 23:28:20 +0100 Subject: [PATCH 06/17] Change EMA adjustment. --- references/classification/train.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/references/classification/train.py b/references/classification/train.py index 4d93f073454..40c174c35d7 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -260,10 +260,10 @@ def main(args): # Decay adjustment that aims to keep the decay independent from other hyper-parameters originally proposed at: # https://github.com/facebookresearch/pycls/blob/f8cd9627/pycls/core/net.py#L123 # - # total_ema_updates = (Dataset_size / n_GPUs) * epochs / (batch_size * EMA_steps) - # We consider constant = (Dataset_size / n_GPUs) for a given dataset/setup and ommit it. Thus: - # adjust = 1 / total_ema_updates ~= batch_size * EMA_steps / epochs - adjust = args.batch_size * args.model_ema_steps / args.epochs + # total_ema_updates = (Dataset_size / n_GPUs) * epochs / (batch_size_per_gpu * EMA_steps) + # We consider constant = Dataset_size for a given dataset/setup and ommit it. Thus: + # adjust = 1 / total_ema_updates ~= n_GPUs * batch_size_per_gpu * EMA_steps / epochs + adjust = args.world_size * args.batch_size * args.model_ema_steps / args.epochs alpha = 1.0 - args.model_ema_decay alpha = min(1.0, alpha * adjust) model_ema = utils.ExponentialMovingAverage(model_without_ddp, device=device, decay=1.0 - alpha) @@ -397,8 +397,8 @@ def get_args_parser(add_help=True): '--model-ema-steps', type=int, default=32, help='the number of iterations that controls how often to update the EMA model (default: 32)') parser.add_argument( - '--model-ema-decay', type=float, default=0.99999, - help='decay factor for Exponential Moving Average of model parameters (default: 0.99999)') + '--model-ema-decay', type=float, default=0.99998, + help='decay factor for Exponential Moving Average of model parameters (default: 0.99998)') return parser From d188ee07546640f058cc95ce43ab0fff654eeb7c Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 4 Oct 2021 12:23:36 +0100 Subject: [PATCH 07/17] Quality of life changes to faciliate testing --- references/classification/train.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/references/classification/train.py b/references/classification/train.py index 40c174c35d7..151c3fecbdf 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -271,14 +271,18 @@ def main(args): if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') model_without_ddp.load_state_dict(checkpoint['model']) - optimizer.load_state_dict(checkpoint['optimizer']) - lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + if not args.test_only: + optimizer.load_state_dict(checkpoint['optimizer']) + lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if model_ema: model_ema.load_state_dict(checkpoint['model_ema']) if args.test_only: - evaluate(model, criterion, data_loader_test, device=device) + if model_ema: + evaluate(model_ema, criterion, data_loader_test, device=device, log_suffix='EMA') + else: + evaluate(model, criterion, data_loader_test, device=device) return print("Start training") From 6655dac1e93c08c80ddfab3a92b56e2c2022849b Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 5 Oct 2021 10:40:54 +0100 Subject: [PATCH 08/17] ufmt format --- references/classification/train.py | 128 +++++++++++++++++------------ references/classification/utils.py | 3 +- test/test_ops.py | 8 +- torchvision/ops/_utils.py | 7 +- 4 files changed, 84 insertions(+), 62 deletions(-) diff --git a/references/classification/train.py b/references/classification/train.py index 3e44aed3ad7..5f75b6a23e0 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -24,7 +24,7 @@ def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, arg metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value}")) metric_logger.add_meter("img/s", utils.SmoothedValue(window_size=10, fmt="{value}")) - header = 'Epoch: [{}]'.format(epoch) + header = "Epoch: [{}]".format(epoch) for i, (image, target) in enumerate(metric_logger.log_every(data_loader, args.print_freq, header)): start_time = time.time() image, target = image.to(device), target.to(device) @@ -219,12 +219,18 @@ def main(args): opt_name = args.opt.lower() if opt_name.startswith("sgd"): - optimizer = torch.optim.SGD(parameters, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, - nesterov="nesterov" in opt_name) - elif opt_name == 'rmsprop': - optimizer = torch.optim.RMSprop(parameters, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, - eps=0.0316, alpha=0.9) - elif opt_name == 'adamw': + optimizer = torch.optim.SGD( + parameters, + lr=args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay, + nesterov="nesterov" in opt_name, + ) + elif opt_name == "rmsprop": + optimizer = torch.optim.RMSprop( + parameters, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, eps=0.0316, alpha=0.9 + ) + elif opt_name == "adamw": optimizer = torch.optim.AdamW(parameters, lr=args.lr, weight_decay=args.weight_decay) else: raise RuntimeError(f"Invalid optimizer {args.opt}. Only SGD, RMSprop and AdamW are supported.") @@ -285,18 +291,18 @@ def main(args): model_ema = utils.ExponentialMovingAverage(model_without_ddp, device=device, decay=1.0 - alpha) if args.resume: - checkpoint = torch.load(args.resume, map_location='cpu') - model_without_ddp.load_state_dict(checkpoint['model']) + checkpoint = torch.load(args.resume, map_location="cpu") + model_without_ddp.load_state_dict(checkpoint["model"]) if not args.test_only: - optimizer.load_state_dict(checkpoint['optimizer']) - lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) - args.start_epoch = checkpoint['epoch'] + 1 + optimizer.load_state_dict(checkpoint["optimizer"]) + lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) + args.start_epoch = checkpoint["epoch"] + 1 if model_ema: model_ema.load_state_dict(checkpoint["model_ema"]) if args.test_only: if model_ema: - evaluate(model_ema, criterion, data_loader_test, device=device, log_suffix='EMA') + evaluate(model_ema, criterion, data_loader_test, device=device, log_suffix="EMA") else: evaluate(model, criterion, data_loader_test, device=device) return @@ -331,42 +337,52 @@ def main(args): def get_args_parser(add_help=True): import argparse - parser = argparse.ArgumentParser(description='PyTorch Classification Training', add_help=add_help) - - parser.add_argument('--data-path', default='/datasets01/imagenet_full_size/061417/', help='dataset') - parser.add_argument('--model', default='resnet18', help='model') - parser.add_argument('--device', default='cuda', help='device') - parser.add_argument('-b', '--batch-size', default=32, type=int) - parser.add_argument('--epochs', default=90, type=int, metavar='N', - help='number of total epochs to run') - parser.add_argument('-j', '--workers', default=16, type=int, metavar='N', - help='number of data loading workers (default: 16)') - parser.add_argument('--opt', default='sgd', type=str, help='optimizer') - parser.add_argument('--lr', default=0.1, type=float, help='initial learning rate') - parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') - parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') - parser.add_argument('--norm-weight-decay', default=None, type=float, - help='weight decay for Normalization layers (default: None, same value as --wd)') - parser.add_argument('--label-smoothing', default=0.0, type=float, - help='label smoothing (default: 0.0)', - dest='label_smoothing') - parser.add_argument('--mixup-alpha', default=0.0, type=float, help='mixup alpha (default: 0.0)') - parser.add_argument('--cutmix-alpha', default=0.0, type=float, help='cutmix alpha (default: 0.0)') - parser.add_argument('--lr-scheduler', default="steplr", help='the lr scheduler (default: steplr)') - parser.add_argument('--lr-warmup-epochs', default=0, type=int, help='the number of epochs to warmup (default: 0)') - parser.add_argument('--lr-warmup-method', default="constant", type=str, - help='the warmup method (default: constant)') - parser.add_argument('--lr-warmup-decay', default=0.01, type=float, help='the decay for lr') - parser.add_argument('--lr-step-size', default=30, type=int, help='decrease lr every step-size epochs') - parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') - parser.add_argument('--print-freq', default=10, type=int, help='print frequency') - parser.add_argument('--output-dir', default='.', help='path where to save') - parser.add_argument('--resume', default='', help='resume from checkpoint') - parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='start epoch') + + parser = argparse.ArgumentParser(description="PyTorch Classification Training", add_help=add_help) + + parser.add_argument("--data-path", default="/datasets01/imagenet_full_size/061417/", help="dataset") + parser.add_argument("--model", default="resnet18", help="model") + parser.add_argument("--device", default="cuda", help="device") + parser.add_argument("-b", "--batch-size", default=32, type=int) + parser.add_argument("--epochs", default=90, type=int, metavar="N", help="number of total epochs to run") + parser.add_argument( + "-j", "--workers", default=16, type=int, metavar="N", help="number of data loading workers (default: 16)" + ) + parser.add_argument("--opt", default="sgd", type=str, help="optimizer") + parser.add_argument("--lr", default=0.1, type=float, help="initial learning rate") + parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum") + parser.add_argument( + "--wd", + "--weight-decay", + default=1e-4, + type=float, + metavar="W", + help="weight decay (default: 1e-4)", + dest="weight_decay", + ) + parser.add_argument( + "--norm-weight-decay", + default=None, + type=float, + help="weight decay for Normalization layers (default: None, same value as --wd)", + ) + parser.add_argument( + "--label-smoothing", default=0.0, type=float, help="label smoothing (default: 0.0)", dest="label_smoothing" + ) + parser.add_argument("--mixup-alpha", default=0.0, type=float, help="mixup alpha (default: 0.0)") + parser.add_argument("--cutmix-alpha", default=0.0, type=float, help="cutmix alpha (default: 0.0)") + parser.add_argument("--lr-scheduler", default="steplr", help="the lr scheduler (default: steplr)") + parser.add_argument("--lr-warmup-epochs", default=0, type=int, help="the number of epochs to warmup (default: 0)") + parser.add_argument( + "--lr-warmup-method", default="constant", type=str, help="the warmup method (default: constant)" + ) + parser.add_argument("--lr-warmup-decay", default=0.01, type=float, help="the decay for lr") + parser.add_argument("--lr-step-size", default=30, type=int, help="decrease lr every step-size epochs") + parser.add_argument("--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma") + parser.add_argument("--print-freq", default=10, type=int, help="print frequency") + parser.add_argument("--output-dir", default=".", help="path where to save") + parser.add_argument("--resume", default="", help="resume from checkpoint") + parser.add_argument("--start-epoch", default=0, type=int, metavar="N", help="start epoch") parser.add_argument( "--cache-dataset", dest="cache_dataset", @@ -412,11 +428,17 @@ def get_args_parser(add_help=True): "--model-ema", action="store_true", help="enable tracking Exponential Moving Average of model parameters" ) parser.add_argument( - '--model-ema-steps', type=int, default=32, - help='the number of iterations that controls how often to update the EMA model (default: 32)') + "--model-ema-steps", + type=int, + default=32, + help="the number of iterations that controls how often to update the EMA model (default: 32)", + ) parser.add_argument( - '--model-ema-decay', type=float, default=0.99998, - help='decay factor for Exponential Moving Average of model parameters (default: 0.99998)') + "--model-ema-decay", + type=float, + default=0.99998, + help="decay factor for Exponential Moving Average of model parameters (default: 0.99998)", + ) return parser diff --git a/references/classification/utils.py b/references/classification/utils.py index f9a6951a12f..5dbb6b8fd24 100644 --- a/references/classification/utils.py +++ b/references/classification/utils.py @@ -179,8 +179,7 @@ def update_parameters(self, model): if self.n_averaged == 0: p_swa.detach().copy_(p_model_) else: - p_swa.detach().copy_(self.avg_fn(p_swa.detach(), p_model_, - self.n_averaged.to(device))) + p_swa.detach().copy_(self.avg_fn(p_swa.detach(), p_model_, self.n_averaged.to(device))) self.n_averaged += 1 diff --git a/test/test_ops.py b/test/test_ops.py index feeff1e0ffe..78f04e5450a 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -2,17 +2,17 @@ import os from abc import ABC, abstractmethod from functools import lru_cache +from functools import lru_cache +from typing import Tuple from typing import Tuple import numpy as np import pytest import torch -from functools import lru_cache from torch import nn, Tensor from torch.autograd import gradcheck from torch.nn.modules.utils import _pair from torchvision import models, ops -from typing import Tuple class RoIOpTester(ABC): @@ -1177,7 +1177,7 @@ def test_stochastic_depth(self, mode, p): class TestUtils: - @pytest.mark.parametrize('norm_layer', [None, nn.BatchNorm2d, nn.LayerNorm]) + @pytest.mark.parametrize("norm_layer", [None, nn.BatchNorm2d, nn.LayerNorm]) def test_split_normalization_params(self, norm_layer): model = models.mobilenet_v3_large(norm_layer=norm_layer) params = ops._utils.split_normalization_params(model, None if norm_layer is None else [norm_layer]) @@ -1186,5 +1186,5 @@ def test_split_normalization_params(self, norm_layer): assert len(params[1]) == 82 -if __name__ == '__main__': +if __name__ == "__main__": pytest.main([__file__]) diff --git a/torchvision/ops/_utils.py b/torchvision/ops/_utils.py index 5615e4ad247..5ad9b7018df 100644 --- a/torchvision/ops/_utils.py +++ b/torchvision/ops/_utils.py @@ -1,8 +1,8 @@ from typing import List, Union +from typing import List, Optional, Tuple, Union import torch from torch import nn, Tensor -from typing import List, Optional, Tuple, Union def _cat(tensors: List[Tensor], dim: int = 0) -> Tensor: @@ -39,8 +39,9 @@ def check_roi_boxes_shape(boxes: Union[Tensor, List[Tensor]]): return -def split_normalization_params(model: nn.Module, - norm_classes: Optional[List[type]] = None) -> Tuple[List[Tensor], List[Tensor]]: +def split_normalization_params( + model: nn.Module, norm_classes: Optional[List[type]] = None +) -> Tuple[List[Tensor], List[Tensor]]: # Adapted from https://github.com/facebookresearch/ClassyVision/blob/659d7f78/classy_vision/generic/util.py#L501 if not norm_classes: norm_classes = [nn.modules.batchnorm._BatchNorm, nn.LayerNorm, nn.GroupNorm] From dc0edb9046b358393a61f71e48c743f5aeb22dc7 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 5 Oct 2021 10:49:57 +0100 Subject: [PATCH 09/17] Fixing imports. --- test/test_ops.py | 4 ++-- torchvision/ops/_utils.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 78f04e5450a..892496dffca 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -2,13 +2,13 @@ import os from abc import ABC, abstractmethod from functools import lru_cache -from functools import lru_cache -from typing import Tuple from typing import Tuple import numpy as np import pytest import torch +from common_utils import needs_cuda, cpu_and_gpu, assert_equal +from PIL import Image from torch import nn, Tensor from torch.autograd import gradcheck from torch.nn.modules.utils import _pair diff --git a/torchvision/ops/_utils.py b/torchvision/ops/_utils.py index 5ad9b7018df..3a07c747f58 100644 --- a/torchvision/ops/_utils.py +++ b/torchvision/ops/_utils.py @@ -1,4 +1,3 @@ -from typing import List, Union from typing import List, Optional, Tuple, Union import torch From 2e93296b32367e00f8c0019fba7ad63864ae45c6 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 8 Oct 2021 11:16:22 +0100 Subject: [PATCH 10/17] Adding FixRes improvement. --- references/classification/presets.py | 3 ++- references/classification/train.py | 33 +++++++++++++++++----------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/references/classification/presets.py b/references/classification/presets.py index 27ce486207d..3128413ad65 100644 --- a/references/classification/presets.py +++ b/references/classification/presets.py @@ -9,11 +9,12 @@ def __init__( crop_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), + interpolation=InterpolationMode.BILINEAR, hflip_prob=0.5, auto_augment_policy=None, random_erase_prob=0.0, ): - trans = [transforms.RandomResizedCrop(crop_size)] + trans = [transforms.RandomResizedCrop(crop_size, interpolation=interpolation)] if hflip_prob > 0: trans.append(transforms.RandomHorizontalFlip(hflip_prob)) if auto_augment_policy is not None: diff --git a/references/classification/train.py b/references/classification/train.py index 521317c905b..34351cd8d72 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -88,23 +88,25 @@ def _get_cache_path(filepath): def load_data(traindir, valdir, args): # Data loading code print("Loading data") - resize_size, crop_size = 256, 224 + val_resize_size, val_crop_size, train_crop_size = 256, 224, 224 interpolation = InterpolationMode.BILINEAR if args.model == "inception_v3": - resize_size, crop_size = 342, 299 + val_resize_size, val_crop_size, train_crop_size = 342, 299, 299 + elif args.model == "resnet50": + val_resize_size, val_crop_size, train_crop_size = 256, 224, 192 elif args.model.startswith("efficientnet_"): sizes = { - "b0": (256, 224), - "b1": (256, 240), - "b2": (288, 288), - "b3": (320, 300), - "b4": (384, 380), - "b5": (456, 456), - "b6": (528, 528), - "b7": (600, 600), + "b0": (256, 224, 224), + "b1": (256, 240, 240), + "b2": (288, 288, 288), + "b3": (320, 300, 300), + "b4": (384, 380, 380), + "b5": (456, 456, 456), + "b6": (528, 528, 528), + "b7": (600, 600, 600), } e_type = args.model.replace("efficientnet_", "") - resize_size, crop_size = sizes[e_type] + val_resize_size, val_crop_size, train_crop_size = sizes[e_type] interpolation = InterpolationMode.BICUBIC print("Loading training data") @@ -120,7 +122,10 @@ def load_data(traindir, valdir, args): dataset = torchvision.datasets.ImageFolder( traindir, presets.ClassificationPresetTrain( - crop_size=crop_size, auto_augment_policy=auto_augment_policy, random_erase_prob=random_erase_prob + crop_size=train_crop_size, + interpolation=interpolation, + auto_augment_policy=auto_augment_policy, + random_erase_prob=random_erase_prob, ), ) if args.cache_dataset: @@ -138,7 +143,9 @@ def load_data(traindir, valdir, args): else: dataset_test = torchvision.datasets.ImageFolder( valdir, - presets.ClassificationPresetEval(crop_size=crop_size, resize_size=resize_size, interpolation=interpolation), + presets.ClassificationPresetEval( + crop_size=val_crop_size, resize_size=val_resize_size, interpolation=interpolation + ), ) if args.cache_dataset: print("Saving dataset_test to {}".format(cache_path)) From 6859fa278f90e68fff4f492144bffe45e748cba3 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Wed, 13 Oct 2021 14:07:27 +0100 Subject: [PATCH 11/17] Support EMA in store_model_weights. --- references/classification/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/references/classification/utils.py b/references/classification/utils.py index 5dbb6b8fd24..32da69d2a57 100644 --- a/references/classification/utils.py +++ b/references/classification/utils.py @@ -384,6 +384,9 @@ def store_model_weights(model, checkpoint_path, checkpoint_key="model", strict=T # Load the weights to the model to validate that everything works # and remove unnecessary weights (such as auxiliaries, etc) + if checkpoint_key == "model_ema": + del checkpoint[checkpoint_key]["n_averaged"] + torch.nn.modules.utils.consume_prefix_in_state_dict_if_present(checkpoint[checkpoint_key], "module.") model.load_state_dict(checkpoint[checkpoint_key], strict=strict) tmp_path = os.path.join(output_dir, str(model.__hash__())) From 950636e2fb0f24cd1ee4c42eadc2b6c4aee2d4be Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 15 Oct 2021 11:23:13 +0100 Subject: [PATCH 12/17] Adding interpolation values. --- references/classification/presets.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/references/classification/presets.py b/references/classification/presets.py index 3128413ad65..6e1000174ab 100644 --- a/references/classification/presets.py +++ b/references/classification/presets.py @@ -19,12 +19,12 @@ def __init__( trans.append(transforms.RandomHorizontalFlip(hflip_prob)) if auto_augment_policy is not None: if auto_augment_policy == "ra": - trans.append(autoaugment.RandAugment()) + trans.append(autoaugment.RandAugment(interpolation=interpolation)) elif auto_augment_policy == "ta_wide": - trans.append(autoaugment.TrivialAugmentWide()) + trans.append(autoaugment.TrivialAugmentWide(interpolation=interpolation)) else: aa_policy = autoaugment.AutoAugmentPolicy(auto_augment_policy) - trans.append(autoaugment.AutoAugment(policy=aa_policy)) + trans.append(autoaugment.AutoAugment(policy=aa_policy, interpolation=interpolation)) trans.extend( [ transforms.PILToTensor(), From 9a6a443e1f4a2fa8e20663492d3eb1b2078f7da5 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Sun, 17 Oct 2021 09:37:55 +0100 Subject: [PATCH 13/17] Change train_crop_size. --- references/classification/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/references/classification/train.py b/references/classification/train.py index 59d7049e269..5707e720e51 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -112,7 +112,7 @@ def load_data(traindir, valdir, args): if args.model == "inception_v3": val_resize_size, val_crop_size, train_crop_size = 342, 299, 299 elif args.model == "resnet50": - val_resize_size, val_crop_size, train_crop_size = 256, 224, 192 + val_resize_size, val_crop_size, train_crop_size = 256, 224, 176 elif args.model.startswith("efficientnet_"): sizes = { "b0": (256, 224, 224), From e699eca520ba2c9915f10ba970360ea642ed19c9 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Sun, 17 Oct 2021 10:19:09 +0100 Subject: [PATCH 14/17] Add interpolation option. --- references/classification/train.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/references/classification/train.py b/references/classification/train.py index 5707e720e51..80c06698c09 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -108,7 +108,7 @@ def load_data(traindir, valdir, args): # Data loading code print("Loading data") val_resize_size, val_crop_size, train_crop_size = 256, 224, 224 - interpolation = InterpolationMode.BILINEAR + interpolation = InterpolationMode(args.interpolation) if args.model == "inception_v3": val_resize_size, val_crop_size, train_crop_size = 342, 299, 299 elif args.model == "resnet50": @@ -458,6 +458,7 @@ def get_args_parser(add_help=True): parser.add_argument( "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only." ) + parser.add_argument("--interpolation", default="bilinear", help="the default interpolation (default: bilinear)") return parser From 9ee69c4202cd759bd338931272ccda72bc878834 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Oct 2021 11:04:41 +0100 Subject: [PATCH 15/17] Removing hardcoded interpolation and sizes from the scripts. --- references/classification/README.md | 35 +++++++++++++++++++ references/classification/train.py | 28 +++++---------- .../classification/train_quantization.py | 8 +++++ 3 files changed, 51 insertions(+), 20 deletions(-) diff --git a/references/classification/README.md b/references/classification/README.md index bae563c31c5..52ad81738ba 100644 --- a/references/classification/README.md +++ b/references/classification/README.md @@ -31,6 +31,17 @@ Here `$MODEL` is one of `alexnet`, `vgg11`, `vgg13`, `vgg16` or `vgg19`. Note that `vgg11_bn`, `vgg13_bn`, `vgg16_bn`, and `vgg19_bn` include batch normalization and thus are trained with the default parameters. +### Inception V3 + +The weights of the Inception V3 model are ported from the original paper rather than trained from scratch. + +Since it expects tensors with a size of N x 3 x 299 x 299, to validate the model use the following command: + +``` +torchrun --nproc_per_node=8 train.py --model inception_v3 + --val-resize-size 342 --val-crop-size 299 --train-crop-size 299 --test-only --pretrained +``` + ### ResNext-50 32x4d ``` torchrun --nproc_per_node=8 train.py\ @@ -79,6 +90,25 @@ The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](ht The weights of the B5-B7 variants are ported from Luke Melas' [EfficientNet-PyTorch repo](https://github.com/lukemelas/EfficientNet-PyTorch/blob/1039e009545d9329ea026c9f7541341439712b96/efficientnet_pytorch/utils.py#L562-L564). +All models were trained using Bicubic interpolation and each have custom crop and resize sizes. To validate the models use the following commands: +``` +torchrun --nproc_per_node=8 train.py --model efficientnet_b0 --interpolation bicubic\ + --val-resize-size 256 --val-crop-size 224 --train-crop-size 224 --test-only --pretrained +torchrun --nproc_per_node=8 train.py --model efficientnet_b1 --interpolation bicubic\ + --val-resize-size 256 --val-crop-size 240 --train-crop-size 240 --test-only --pretrained +torchrun --nproc_per_node=8 train.py --model efficientnet_b2 --interpolation bicubic\ + --val-resize-size 288 --val-crop-size 288 --train-crop-size 288 --test-only --pretrained +torchrun --nproc_per_node=8 train.py --model efficientnet_b3 --interpolation bicubic\ + --val-resize-size 320 --val-crop-size 300 --train-crop-size 300 --test-only --pretrained +torchrun --nproc_per_node=8 train.py --model efficientnet_b4 --interpolation bicubic\ + --val-resize-size 384 --val-crop-size 380 --train-crop-size 380 --test-only --pretrained +torchrun --nproc_per_node=8 train.py --model efficientnet_b5 --interpolation bicubic\ + --val-resize-size 456 --val-crop-size 456 --train-crop-size 456 --test-only --pretrained +torchrun --nproc_per_node=8 train.py --model efficientnet_b6 --interpolation bicubic\ + --val-resize-size 528 --val-crop-size 528 --train-crop-size 528 --test-only --pretrained +torchrun --nproc_per_node=8 train.py --model efficientnet_b7 --interpolation bicubic\ + --val-resize-size 600 --val-crop-size 600 --train-crop-size 600 --test-only --pretrained +``` ### RegNet @@ -181,3 +211,8 @@ For post training quant, device is set to CPU. For training, the device is set t ``` python train_quantization.py --device='cpu' --test-only --backend='' --model='' ``` + +For inception_v3 you need to pass the following extra parameters: +``` +--val-resize-size 342 --val-crop-size 299 --train-crop-size 299 +``` \ No newline at end of file diff --git a/references/classification/train.py b/references/classification/train.py index 05e6496f9e3..c6bb615e001 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -107,26 +107,8 @@ def _get_cache_path(filepath): def load_data(traindir, valdir, args): # Data loading code print("Loading data") - val_resize_size, val_crop_size, train_crop_size = 256, 224, 224 + val_resize_size, val_crop_size, train_crop_size = args.val_resize_size, args.val_crop_size, args.train_crop_size interpolation = InterpolationMode(args.interpolation) - if args.model == "inception_v3": - val_resize_size, val_crop_size, train_crop_size = 342, 299, 299 - elif args.model == "resnet50": - val_resize_size, val_crop_size, train_crop_size = 256, 224, 176 - elif args.model.startswith("efficientnet_"): - sizes = { - "b0": (256, 224, 224), - "b1": (256, 240, 240), - "b2": (288, 288, 288), - "b3": (320, 300, 300), - "b4": (384, 380, 380), - "b5": (456, 456, 456), - "b6": (528, 528, 528), - "b7": (600, 600, 600), - } - e_type = args.model.replace("efficientnet_", "") - val_resize_size, val_crop_size, train_crop_size = sizes[e_type] - interpolation = InterpolationMode.BICUBIC print("Loading training data") st = time.time() @@ -458,7 +440,13 @@ def get_args_parser(add_help=True): parser.add_argument( "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only." ) - parser.add_argument("--interpolation", default="bilinear", help="the default interpolation (default: bilinear)") + parser.add_argument("--interpolation", default="bilinear", help="the interpolation method (default: bilinear)") + parser.add_argument("--val-resize-size", default=256, type=int, + help="the resize size used for validation (default: 256)") + parser.add_argument("--val-crop-size", default=224, type=int, + help="the central crop size used for validation (default: 224)") + parser.add_argument("--train-crop-size", default=224, type=int, + help="the random crop size used for training (default: 224)") return parser diff --git a/references/classification/train_quantization.py b/references/classification/train_quantization.py index ae4e81b0133..e6124b364f9 100644 --- a/references/classification/train_quantization.py +++ b/references/classification/train_quantization.py @@ -236,6 +236,14 @@ def get_args_parser(add_help=True): parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes") parser.add_argument("--dist-url", default="env://", help="url used to set up distributed training") + parser.add_argument("--interpolation", default="bilinear", help="the interpolation method (default: bilinear)") + parser.add_argument("--val-resize-size", default=256, type=int, + help="the resize size used for validation (default: 256)") + parser.add_argument("--val-crop-size", default=224, type=int, + help="the central crop size used for validation (default: 224)") + parser.add_argument("--train-crop-size", default=224, type=int, + help="the random crop size used for training (default: 224)") + return parser From bc5a2bdaf974aeb55e01ac204d23e86cc3f1a99e Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Oct 2021 11:24:19 +0100 Subject: [PATCH 16/17] Fixing linter. --- references/classification/README.md | 2 +- references/classification/train.py | 15 +++++++++------ references/classification/train_quantization.py | 15 +++++++++------ 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/references/classification/README.md b/references/classification/README.md index 52ad81738ba..006e9c398b1 100644 --- a/references/classification/README.md +++ b/references/classification/README.md @@ -215,4 +215,4 @@ python train_quantization.py --device='cpu' --test-only --backend='' -- For inception_v3 you need to pass the following extra parameters: ``` --val-resize-size 342 --val-crop-size 299 --train-crop-size 299 -``` \ No newline at end of file +``` diff --git a/references/classification/train.py b/references/classification/train.py index c6bb615e001..fd3e65a5c42 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -441,12 +441,15 @@ def get_args_parser(add_help=True): "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only." ) parser.add_argument("--interpolation", default="bilinear", help="the interpolation method (default: bilinear)") - parser.add_argument("--val-resize-size", default=256, type=int, - help="the resize size used for validation (default: 256)") - parser.add_argument("--val-crop-size", default=224, type=int, - help="the central crop size used for validation (default: 224)") - parser.add_argument("--train-crop-size", default=224, type=int, - help="the random crop size used for training (default: 224)") + parser.add_argument( + "--val-resize-size", default=256, type=int, help="the resize size used for validation (default: 256)" + ) + parser.add_argument( + "--val-crop-size", default=224, type=int, help="the central crop size used for validation (default: 224)" + ) + parser.add_argument( + "--train-crop-size", default=224, type=int, help="the random crop size used for training (default: 224)" + ) return parser diff --git a/references/classification/train_quantization.py b/references/classification/train_quantization.py index e6124b364f9..b6f52c71632 100644 --- a/references/classification/train_quantization.py +++ b/references/classification/train_quantization.py @@ -237,12 +237,15 @@ def get_args_parser(add_help=True): parser.add_argument("--dist-url", default="env://", help="url used to set up distributed training") parser.add_argument("--interpolation", default="bilinear", help="the interpolation method (default: bilinear)") - parser.add_argument("--val-resize-size", default=256, type=int, - help="the resize size used for validation (default: 256)") - parser.add_argument("--val-crop-size", default=224, type=int, - help="the central crop size used for validation (default: 224)") - parser.add_argument("--train-crop-size", default=224, type=int, - help="the random crop size used for training (default: 224)") + parser.add_argument( + "--val-resize-size", default=256, type=int, help="the resize size used for validation (default: 256)" + ) + parser.add_argument( + "--val-crop-size", default=224, type=int, help="the central crop size used for validation (default: 224)" + ) + parser.add_argument( + "--train-crop-size", default=224, type=int, help="the random crop size used for training (default: 224)" + ) return parser From 14a3323b97bbeb947e7ed280e2c932551739d0e3 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 21 Oct 2021 18:13:12 +0100 Subject: [PATCH 17/17] Incorporating feedback from code review. --- references/classification/train.py | 4 +++- references/classification/train_quantization.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/references/classification/train.py b/references/classification/train.py index fd3e65a5c42..bae6adea63a 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -440,7 +440,9 @@ def get_args_parser(add_help=True): parser.add_argument( "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only." ) - parser.add_argument("--interpolation", default="bilinear", help="the interpolation method (default: bilinear)") + parser.add_argument( + "--interpolation", default="bilinear", type=str, help="the interpolation method (default: bilinear)" + ) parser.add_argument( "--val-resize-size", default=256, type=int, help="the resize size used for validation (default: 256)" ) diff --git a/references/classification/train_quantization.py b/references/classification/train_quantization.py index b6f52c71632..f384be76a62 100644 --- a/references/classification/train_quantization.py +++ b/references/classification/train_quantization.py @@ -236,7 +236,9 @@ def get_args_parser(add_help=True): parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes") parser.add_argument("--dist-url", default="env://", help="url used to set up distributed training") - parser.add_argument("--interpolation", default="bilinear", help="the interpolation method (default: bilinear)") + parser.add_argument( + "--interpolation", default="bilinear", type=str, help="the interpolation method (default: bilinear)" + ) parser.add_argument( "--val-resize-size", default=256, type=int, help="the resize size used for validation (default: 256)" )