diff --git a/models/common.py b/models/common.py index 526d8578..36603230 100644 --- a/models/common.py +++ b/models/common.py @@ -36,11 +36,14 @@ def autopad(k, p=None): # kernel, padding class Conv(nn.Module): # Standard convolution + default_act = nn.SiLU() # default activation + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups super().__init__() self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) self.bn = nn.BatchNorm2d(c2) - self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() + self.is_fuse = False def forward(self, x): diff --git a/models/yolo.py b/models/yolo.py index eea2cdb0..80f9129b 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -21,7 +21,7 @@ from models.common import C3, C3SPP, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C3Ghost, C3x, Concat, Contract, Conv, CrossConv, DWConv, DWConvTranspose2d, Expand, Focus, GhostBottleneck, GhostConv from models.experimental import MixConv2d from utils.autoanchor import check_anchor_order -from utils.general import LOGGER, check_yaml, make_divisible, print_args +from utils.general import LOGGER, check_yaml, colorstr, make_divisible, print_args from utils.oneflow_utils import fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device, time_sync from utils.plots import feature_visualization @@ -271,7 +271,10 @@ def _apply(self, fn): def parse_model(d, ch): # model_dict, input_channels(3) LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") - anchors, nc, gd, gw = d["anchors"], d["nc"], d["depth_multiple"], d["width_multiple"] + anchors, nc, gd, gw, act = d["anchors"], d["nc"], d["depth_multiple"], d["width_multiple"], d.get("activation") + if act: + Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() + LOGGER.info(f"{colorstr('activation:')} {act}") # print na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors no = na * (nc + 5) # number of outputs = anchors * (classes + 5) diff --git a/train.py b/train.py index 20244ad4..a539f5f1 100644 --- a/train.py +++ b/train.py @@ -125,7 +125,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio plots = not evolve and not opt.noplots # create plots cuda = device.type != "cpu" - init_seeds(1, deterministic=True) + init_seeds(opt.seed + 1 + RANK, deterministic=True) # with torch_distributed_zero_first(LOCAL_RANK): # 这个是上下文管理器 data_dict = data_dict or check_dataset(data) # check if None diff --git a/utils/dataloaders.py b/utils/dataloaders.py index d7b557a9..ef5db500 100755 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -76,7 +76,7 @@ ) # include video suffixes BAR_FORMAT = "{l_bar}{bar:10}{r_bar}{bar:-10b}" # tqdm bar format LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1)) # https://pytorch.org/docs/stable/elastic/run.html - +RANK = int(os.getenv("RANK", -1)) # Get orientation exif tag for orientation in ExifTags.TAGS.keys(): if ExifTags.TAGS[orientation] == "Orientation": @@ -178,7 +178,7 @@ def create_dataloader( sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle) loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates generator = flow.Generator() - generator.manual_seed(0) + generator.manual_seed(6148914691236517205 + RANK) return ( loader( dataset,