Updates

bmaltais · Jul 19, 2023 · d14fa0d · d14fa0d
1 parent 4541864
commit d14fa0d
Show file tree

Hide file tree

Showing 7 changed files with 62 additions and 32 deletions.
diff --git a/README.md b/README.md
@@ -179,6 +179,8 @@ To set up the project, follow these steps:
    .\setup.bat
    ```
 
+   During the accelerate config step use the default values as proposed during the configuration unless you know your hardware demand otherwise. Tfe amount of VRAM on your GPU does not have an impact on the values used.
+
 #### Optional: CUDNN 8.6
 
 The following steps are optional but can improve the learning speed for owners of NVIDIA 30X0/40X0 GPUs. These steps enable larger training batch sizes and faster training speeds.
@@ -474,8 +476,7 @@ If you come across a `FileNotFoundError`, it is likely due to an installation is
 
 ## Change History
 
-* 2023/07/18 (v21.8.3)
-  - Update to latest sd-scripts sdxl code base
-  - Fix typo: https://github.com/bmaltais/kohya_ss/issues/1205
-  - Add min and max resolution parameter for buckets
-  - Add colab notebook from https://github.com/camenduru/kohya_ss-colab
+* 2023/07/18 (v21.8.4)
+  - Relocate LR number of cycles and LR power options
+  - Add missing LR number of cycles and LR power to Dreambooth and TI scripts
+  - Fix issue with conv_block_dims and conv_block_alphas
diff --git a/dreambooth_gui.py b/dreambooth_gui.py
@@ -98,6 +98,8 @@ def save_configuration(
     gradient_accumulation_steps,
     model_list,
     keep_tokens,
+    lr_scheduler_num_cycles,
+    lr_scheduler_power,
     persistent_data_loader_workers,
     bucket_no_upscale,
     random_crop,
@@ -209,6 +211,8 @@ def open_configuration(
     gradient_accumulation_steps,
     model_list,
     keep_tokens,
+    lr_scheduler_num_cycles,
+    lr_scheduler_power,
     persistent_data_loader_workers,
     bucket_no_upscale,
     random_crop,
@@ -319,6 +323,8 @@ def train_model(
     gradient_accumulation_steps,
     model_list,  # Keep this. Yes, it is unused here but required given the common list used
     keep_tokens,
+    lr_scheduler_num_cycles,
+    lr_scheduler_power,
     persistent_data_loader_workers,
     bucket_no_upscale,
     random_crop,
@@ -545,6 +551,12 @@ def train_model(
         run_cmd += f' --vae="{vae}"'
     if not output_name == '':
         run_cmd += f' --output_name="{output_name}"'
+    if not lr_scheduler_num_cycles == '':
+        run_cmd += f' --lr_scheduler_num_cycles="{lr_scheduler_num_cycles}"'
+    else:
+        run_cmd += f' --lr_scheduler_num_cycles="{epoch}"'
+    if not lr_scheduler_power == '':
+        run_cmd += f' --lr_scheduler_power="{lr_scheduler_power}"'
     if int(max_token_length) > 75:
         run_cmd += f' --max_token_length={max_token_length}'
     if not max_train_epochs == '':
@@ -770,6 +782,8 @@ def dreambooth_tab(
             advanced_training.gradient_accumulation_steps,
             source_model.model_list,
             advanced_training.keep_tokens,
+            basic_training.lr_scheduler_num_cycles,
+            basic_training.lr_scheduler_power,
             advanced_training.persistent_data_loader_workers,
             advanced_training.bucket_no_upscale,
             advanced_training.random_crop,

diff --git a/library/class_advanced_training.py b/library/class_advanced_training.py
@@ -41,16 +41,6 @@ def noise_offset_type_change(noise_offset_type):
                 outputs=self.vae,
                 show_progress=False,
             )
-        with gr.Row(visible=not finetuning):
-            self.lr_scheduler_num_cycles = gr.Textbox(
-                label='LR number of cycles',
-                placeholder='(Optional) For Cosine with restart and polynomial only',
-            )
-
-            self.lr_scheduler_power = gr.Textbox(
-                label='LR power',
-                placeholder='(Optional) For Cosine with restart and polynomial only',
-            )
 
         with gr.Row():
             self.additional_parameters = gr.Textbox(

diff --git a/library/class_basic_training.py b/library/class_basic_training.py
@@ -110,6 +110,16 @@ def __init__(
                 value='AdamW8bit',
                 interactive=True,
             )
+        with gr.Row(visible=not finetuning):
+            self.lr_scheduler_num_cycles = gr.Textbox(
+                label='LR number of cycles',
+                placeholder='(Optional) For Cosine with restart and polynomial only',
+            )
+
+            self.lr_scheduler_power = gr.Textbox(
+                label='LR power',
+                placeholder='(Optional) For Cosine with restart and polynomial only',
+            )
         with gr.Row():
             self.optimizer_args = gr.Textbox(
                 label='Optimizer extra arguments',

diff --git a/library/class_source_model.py b/library/class_source_model.py
@@ -80,6 +80,7 @@ def __init__(
                     outputs=self.pretrained_model_name_or_path,
                     show_progress=False,
                 )
+            with gr.Row():
                 self.v2 = gr.Checkbox(label='v2', value=False, visible=False)
                 self.v_parameterization = gr.Checkbox(
                     label='v_parameterization', value=False, visible=False

diff --git a/lora_gui.py b/lora_gui.py
@@ -141,8 +141,8 @@ def save_configuration(
     block_lr_zero_threshold,
     block_dims,
     block_alphas,
-    conv_dims,
-    conv_alphas,
+    conv_block_dims,
+    conv_block_alphas,
     weighted_captions,
     unit,
     save_every_n_steps,
@@ -286,8 +286,8 @@ def open_configuration(
     block_lr_zero_threshold,
     block_dims,
     block_alphas,
-    conv_dims,
-    conv_alphas,
+    conv_block_dims,
+    conv_block_alphas,
     weighted_captions,
     unit,
     save_every_n_steps,
@@ -457,8 +457,8 @@ def train_model(
     block_lr_zero_threshold,
     block_dims,
     block_alphas,
-    conv_dims,
-    conv_alphas,
+    conv_block_dims,
+    conv_block_alphas,
     weighted_captions,
     unit,
     save_every_n_steps,
@@ -776,8 +776,8 @@ def train_model(
             'block_lr_zero_threshold',
             'block_dims',
             'block_alphas',
-            'conv_dims',
-            'conv_alphas',
+            'conv_block_dims',
+            'conv_block_alphas',
             'rank_dropout',
             'module_dropout',
         ]
@@ -810,8 +810,8 @@ def train_model(
             'block_lr_zero_threshold',
             'block_dims',
             'block_alphas',
-            'conv_dims',
-            'conv_alphas',
+            'conv_block_dims',
+            'conv_block_alphas',
             'rank_dropout',
             'module_dropout',
             'unit',
@@ -1357,12 +1357,12 @@ def update_LoRA_settings(LoRA_type):
                             )
                     with gr.Tab(label='Conv'):
                         with gr.Row(visible=True):
-                            conv_dims = gr.Textbox(
+                            conv_block_dims = gr.Textbox(
                                 label='Conv dims',
                                 placeholder='(Optional) eg: 2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2',
-                                info='Expand LoRA to Conv2d 3x3 and specify the dim (rank) of each block. Specify 25 numbers.',
+                                info='Extend LoRA to Conv2d 3x3 and specify the dim (rank) of each block. Specify 25 numbers.',
                             )
-                            conv_alphas = gr.Textbox(
+                            conv_block_alphas = gr.Textbox(
                                 label='Conv alphas',
                                 placeholder='(Optional) eg: 2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2',
                                 info='Specify the alpha of each block when expanding LoRA to Conv2d 3x3. Specify 25 numbers. If omitted, the value of conv_alpha is used.',
@@ -1470,8 +1470,8 @@ def update_LoRA_settings(LoRA_type):
             network_alpha,
             folders.training_comment,
             advanced_training.keep_tokens,
-            advanced_training.lr_scheduler_num_cycles,
-            advanced_training.lr_scheduler_power,
+            basic_training.lr_scheduler_num_cycles,
+            basic_training.lr_scheduler_power,
             advanced_training.persistent_data_loader_workers,
             advanced_training.bucket_no_upscale,
             advanced_training.random_crop,
@@ -1505,8 +1505,8 @@ def update_LoRA_settings(LoRA_type):
             block_lr_zero_threshold,
             block_dims,
             block_alphas,
-            conv_dims,
-            conv_alphas,
+            conv_block_dims,
+            conv_block_alphas,
             advanced_training.weighted_captions,
             unit,
             advanced_training.save_every_n_steps,

diff --git a/textual_inversion_gui.py b/textual_inversion_gui.py
@@ -104,6 +104,8 @@ def save_configuration(
     weights,
     template,
     keep_tokens,
+    lr_scheduler_num_cycles,
+    lr_scheduler_power,
     persistent_data_loader_workers,
     bucket_no_upscale,
     random_crop,
@@ -223,6 +225,8 @@ def open_configuration(
     weights,
     template,
     keep_tokens,
+    lr_scheduler_num_cycles,
+    lr_scheduler_power,
     persistent_data_loader_workers,
     bucket_no_upscale,
     random_crop,
@@ -339,6 +343,8 @@ def train_model(
     weights,
     template,
     keep_tokens,
+    lr_scheduler_num_cycles,
+    lr_scheduler_power,
     persistent_data_loader_workers,
     bucket_no_upscale,
     random_crop,
@@ -561,6 +567,12 @@ def train_model(
         run_cmd += f' --vae="{vae}"'
     if not output_name == '':
         run_cmd += f' --output_name="{output_name}"'
+    if not lr_scheduler_num_cycles == '':
+        run_cmd += f' --lr_scheduler_num_cycles="{lr_scheduler_num_cycles}"'
+    else:
+        run_cmd += f' --lr_scheduler_num_cycles="{epoch}"'
+    if not lr_scheduler_power == '':
+        run_cmd += f' --lr_scheduler_power="{lr_scheduler_power}"'
     if int(max_token_length) > 75:
         run_cmd += f' --max_token_length={max_token_length}'
     if not max_train_epochs == '':
@@ -853,6 +865,8 @@ def ti_tab(
             weights,
             template,
             advanced_training.keep_tokens,
+            basic_training.lr_scheduler_num_cycles,
+            basic_training.lr_scheduler_power,
             advanced_training.persistent_data_loader_workers,
             advanced_training.bucket_no_upscale,
             advanced_training.random_crop,