merge pull request kohya-ss#1000; fixed

jihnenglin · Dec 13, 2023 · c146c92 · c146c92
1 parent 8e1d4f3
commit c146c92
Showing 1 changed file with 38 additions and 10 deletions.
diff --git a/library/train_util.py b/library/train_util.py
@@ -351,6 +351,7 @@ def __init__(
         shuffle_caption: bool,
         caption_separator: str,
         keep_tokens: int,
+        keep_tokens_separator: str,
         color_aug: bool,
         flip_aug: bool,
         face_crop_aug_range: Optional[Tuple[float, float]],
@@ -368,6 +369,7 @@ def __init__(
         self.shuffle_caption = shuffle_caption
         self.caption_separator = caption_separator
         self.keep_tokens = keep_tokens
+        self.keep_tokens_separator = keep_tokens_separator
         self.color_aug = color_aug
         self.flip_aug = flip_aug
         self.face_crop_aug_range = face_crop_aug_range
@@ -395,6 +397,7 @@ def __init__(
         shuffle_caption,
         caption_separator: str,
         keep_tokens,
+        keep_tokens_separator,
         color_aug,
         flip_aug,
         face_crop_aug_range,
@@ -415,6 +418,7 @@ def __init__(
             shuffle_caption,
             caption_separator,
             keep_tokens,
+            keep_tokens_separator,
             color_aug,
             flip_aug,
             face_crop_aug_range,
@@ -449,6 +453,7 @@ def __init__(
         shuffle_caption,
         caption_separator,
         keep_tokens,
+        keep_tokens_separator,
         color_aug,
         flip_aug,
         face_crop_aug_range,
@@ -469,6 +474,7 @@ def __init__(
             shuffle_caption,
             caption_separator,
             keep_tokens,
+            keep_tokens_separator,
             color_aug,
             flip_aug,
             face_crop_aug_range,
@@ -500,6 +506,7 @@ def __init__(
         shuffle_caption,
         caption_separator,
         keep_tokens,
+        keep_tokens_separator,
         color_aug,
         flip_aug,
         face_crop_aug_range,
@@ -520,6 +527,7 @@ def __init__(
             shuffle_caption,
             caption_separator,
             keep_tokens,
+            keep_tokens_separator,
             color_aug,
             flip_aug,
             face_crop_aug_range,
@@ -654,15 +662,33 @@ def process_caption(self, subset: BaseSubset, caption):
             caption = ""
         else:
             if subset.shuffle_caption or subset.token_warmup_step > 0 or subset.caption_tag_dropout_rate > 0:
-                tokens = [t.strip() for t in caption.strip().split(subset.caption_separator)]
+                fixed_tokens = []
+                flex_tokens = []
+                if (
+                    hasattr(subset, "keep_tokens_separator")
+                    and subset.keep_tokens_separator
+                    and subset.keep_tokens_separator in caption
+                ):
+                    fixed_part, flex_part = caption.split(subset.keep_tokens_separator, 1)
+                    fixed_tokens = [t.strip() for t in fixed_part.split(subset.caption_separator) if t.strip()]
+                    flex_tokens = [t.strip() for t in flex_part.split(subset.caption_separator) if t.strip()]
+                else:
+                    tokens = [t.strip() for t in caption.strip().split(subset.caption_separator)]
+                    flex_tokens = tokens[:]
+                    if subset.keep_tokens > 0:
+                        fixed_tokens = flex_tokens[: subset.keep_tokens]
+                        flex_tokens = tokens[subset.keep_tokens :]
+
                 if subset.token_warmup_step < 1:  # 初回に上書きする
                     subset.token_warmup_step = math.floor(subset.token_warmup_step * self.max_train_steps)
                 if subset.token_warmup_step and self.current_step < subset.token_warmup_step:
                     tokens_len = (
-                        math.floor((self.current_step) * ((len(tokens) - subset.token_warmup_min) / (subset.token_warmup_step)))
+                        math.floor(
+                            (self.current_step) * ((len(flex_tokens) - subset.token_warmup_min) / (subset.token_warmup_step))
+                        )
                         + subset.token_warmup_min
                     )
-                    tokens = tokens[:tokens_len]
+                    flex_tokens = flex_tokens[:tokens_len]
 
                 def dropout_tags(tokens):
                     if subset.caption_tag_dropout_rate <= 0:
@@ -673,12 +699,6 @@ def dropout_tags(tokens):
                             l.append(token)
                     return l
 
-                fixed_tokens = []
-                flex_tokens = tokens[:]
-                if subset.keep_tokens > 0:
-                    fixed_tokens = flex_tokens[: subset.keep_tokens]
-                    flex_tokens = tokens[subset.keep_tokens :]
-
                 if subset.shuffle_caption:
                     random.shuffle(flex_tokens)
 
@@ -1722,7 +1742,9 @@ def __init__(
                 subset.caption_extension,
                 subset.num_repeats,
                 subset.shuffle_caption,
+                subset.caption_separator,
                 subset.keep_tokens,
+                subset.keep_tokens_separator,
                 subset.color_aug,
                 subset.flip_aug,
                 subset.face_crop_aug_range,
@@ -3136,6 +3158,13 @@ def add_dataset_arguments(
         default=0,
         help="keep heading N tokens when shuffling caption tokens (token means comma separated strings) / captionのシャッフル時に、先頭からこの個数のトークンをシャッフルしないで残す（トークンはカンマ区切りの各部分を意味する）",
     )
+    parser.add_argument(
+        "--keep_tokens_separator",
+        type=str,
+        default="",
+        help="A custom separator to divide the caption into fixed and flexible parts. Tokens before this separator will not be shuffled. If not specified, '--keep_tokens' will be used to determine the fixed number of tokens."
+        + " / captionを固定部分と可変部分に分けるためのカスタム区切り文字。この区切り文字より前のトークンはシャッフルされない。指定しない場合、'--keep_tokens'が固定部分のトークン数として使用される。",
+    )
     parser.add_argument(
         "--caption_prefix",
         type=str,
@@ -3837,7 +3866,6 @@ def prepare_accelerator(args: argparse.Namespace):
             if args.wandb_api_key is not None:
                 wandb.login(key=args.wandb_api_key)
 
-
     kwargs_handlers = (
         InitProcessGroupKwargs(timeout=datetime.timedelta(minutes=args.ddp_timeout)) if args.ddp_timeout else None,
         DistributedDataParallelKwargs(gradient_as_bucket_view=args.gradient_as_bucket_view, static_graph=args.static_graph)