-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into vsarge/seqlen_logging
- Loading branch information
Showing
219 changed files
with
29,128 additions
and
1,072 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
36 changes: 36 additions & 0 deletions
36
examples/multimodal/text_to_image/controlnet/conf/controlnet_infer.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
name: stable-diffusion-train | ||
|
||
infer: | ||
unconditional_guidance_scale: 3 | ||
num_images_per_prompt: 4 | ||
hint_image_size: 512 | ||
height: 512 | ||
width: 512 | ||
down_factor: 8 | ||
inference_steps: 50 | ||
sampler_type: 'DDIM' | ||
eta: 0 | ||
output_type: 'pil' | ||
save_to_file: True | ||
out_path: 'controlnet' | ||
seed: 355 | ||
prompts: | ||
- high quality picture of a house in oil painting style | ||
control: | ||
- /datasets/coco-stuff/house.png #images/val2017/000000001584.jpg | ||
# Depending on the input control, if the input control is already the conditioning image, null should be passed here | ||
# If a reconstruction target is used as control, then preprocessing function that turns it into a conditioning image needs to be specified | ||
control_image_preprocess: | ||
|
||
trainer: | ||
devices: 1 | ||
num_nodes: 1 | ||
accelerator: gpu | ||
precision: 16 | ||
logger: False # logger provided by exp_manager | ||
|
||
model: | ||
restore_from_path: /ckpts/controlnet/30k.nemo | ||
precision: ${trainer.precision} | ||
strength: 2.0 | ||
guess_mode: False |
222 changes: 222 additions & 0 deletions
222
examples/multimodal/text_to_image/controlnet/conf/controlnet_v1-5.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
trainer: | ||
devices: 2 | ||
num_nodes: 1 | ||
accelerator: gpu | ||
precision: 16 | ||
logger: False # logger provided by exp_manager | ||
enable_checkpointing: False | ||
use_distributed_sampler: True | ||
max_epochs: 3 # PTL default. In practice, max_steps will be reached first. | ||
max_steps: -1 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches | ||
log_every_n_steps: 10 | ||
accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models | ||
gradient_clip_val: 1.0 | ||
benchmark: False | ||
enable_model_summary: True | ||
limit_val_batches: 0 | ||
|
||
|
||
exp_manager: | ||
explicit_log_dir: null | ||
exp_dir: null | ||
name: controlnet | ||
create_wandb_logger: False | ||
wandb_logger_kwargs: | ||
project: stable-diffusion | ||
group: controlnet | ||
name: controlnet-v1.5 | ||
resume: True | ||
create_checkpoint_callback: True | ||
create_tensorboard_logger: True | ||
checkpoint_callback_params: | ||
save_top_k: -1 | ||
every_n_train_steps: 5000 | ||
every_n_epochs: 0 | ||
monitor: reduced_train_loss | ||
filename: 'controlnet--{reduced_train_loss:.2f}-{step}-{consumed_samples}' | ||
resume_if_exists: True | ||
resume_ignore_no_checkpoint: True | ||
resume_from_checkpoint: ${model.resume_from_checkpoint} | ||
ema: | ||
enable: False | ||
decay: 0.9999 | ||
validate_original_weights: False | ||
every_n_steps: 1 | ||
cpu_offload: False | ||
|
||
|
||
|
||
|
||
model: | ||
precision: ${trainer.precision} | ||
# specify micro_batch_size, global_batch_size, and model parallelism | ||
# gradient accumulation will be done automatically based on data_parallel_size | ||
micro_batch_size: 4 # limited by GPU memory | ||
global_batch_size: 8 | ||
|
||
linear_start: 0.00085 | ||
linear_end: 0.0120 | ||
num_timesteps_cond: 1 | ||
log_every_t: 200 | ||
timesteps: 1000 | ||
first_stage_key: images | ||
cond_stage_key: captions | ||
control_key: hint | ||
image_size: 64 | ||
channels: 4 | ||
cond_stage_trainable: false | ||
conditioning_key: crossattn | ||
monitor: val/loss_simple_ema | ||
scale_factor: 0.18215 | ||
use_ema: False | ||
scale_by_std: False | ||
ckpt_path: | ||
ignore_keys: [ ] | ||
parameterization: eps | ||
clip_denoised: True | ||
load_only_unet: False | ||
cosine_s: 8e-3 | ||
given_betas: | ||
original_elbo_weight: 0 | ||
v_posterior: 0 | ||
l_simple_weight: 1 | ||
use_positional_encodings: False | ||
learn_logvar: False | ||
logvar_init: 0 | ||
beta_schedule: linear | ||
loss_type: l2 | ||
learning_rate: 1.0e-04 | ||
concat_mode: True | ||
cond_stage_forward: | ||
text_embedding_dropout_rate: 0.0 | ||
fused_opt: True | ||
inductor: False | ||
inductor_cudagraphs: False | ||
capture_cudagraph_iters: -1 # -1 to disable | ||
channels_last: True | ||
only_mid_control: False | ||
sd_locked: True | ||
|
||
control_stage_config: | ||
_target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlNet | ||
params: | ||
from_pretrained_unet: /ckpts/v1-5-pruned.ckpt | ||
from_NeMo: True | ||
image_size: 32 # unused | ||
in_channels: 4 | ||
hint_channels: 3 | ||
model_channels: 320 | ||
attention_resolutions: [ 4, 2, 1 ] | ||
num_res_blocks: 2 | ||
channel_mult: [ 1, 2, 4, 4 ] | ||
num_heads: 8 | ||
use_spatial_transformer: True | ||
use_linear_in_transformer: False | ||
transformer_depth: 1 | ||
context_dim: 768 | ||
use_checkpoint: False | ||
legacy: False | ||
use_flash_attention: False | ||
|
||
unet_config: | ||
_target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlledUnetModel | ||
from_pretrained: /ckpts/v1-5-pruned.ckpt | ||
from_NeMo: True | ||
image_size: 32 # unused | ||
in_channels: 4 | ||
out_channels: 4 | ||
model_channels: 320 | ||
attention_resolutions: | ||
- 4 | ||
- 2 | ||
- 1 | ||
num_res_blocks: 2 | ||
channel_mult: | ||
- 1 | ||
- 2 | ||
- 4 | ||
- 4 | ||
num_heads: 8 | ||
use_spatial_transformer: True | ||
transformer_depth: 1 | ||
context_dim: 768 | ||
use_checkpoint: False | ||
legacy: False | ||
use_flash_attention: False | ||
|
||
first_stage_config: | ||
_target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL | ||
from_pretrained: /ckpts/vae.bin | ||
embed_dim: 4 | ||
monitor: val/rec_loss | ||
ddconfig: | ||
double_z: true | ||
z_channels: 4 | ||
resolution: 256 | ||
in_channels: 3 | ||
out_ch: 3 | ||
ch: 128 | ||
ch_mult: | ||
- 1 | ||
- 2 | ||
- 4 | ||
- 4 | ||
num_res_blocks: 2 | ||
attn_resolutions: [] | ||
dropout: 0.0 | ||
lossconfig: | ||
target: torch.nn.Identity | ||
|
||
cond_stage_config: | ||
_target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder | ||
version: openai/clip-vit-large-patch14 | ||
device: cuda | ||
max_length: 77 | ||
|
||
data: | ||
num_workers: 16 | ||
synthetic_data: False # dataset_path and local_root_path can be empty when using synthetic data | ||
synthetic_data_length: 10000 | ||
train: | ||
dataset_path: | ||
#- /datasets/tarfiles/fill50k.pkl | ||
- /datasets/coco-stuff/coco-stuff-tarfiles/wdinfo-coco-stuff.pkl | ||
augmentations: | ||
resize_smallest_side: 512 | ||
center_crop_h_w: 512, 512 | ||
horizontal_flip: False | ||
filterings: | ||
|
||
webdataset: | ||
infinite_sampler: False | ||
local_root_path: /datasets/coco-stuff/coco-stuff-tarfiles | ||
|
||
optim: | ||
name: fused_adam | ||
lr: 2e-5 | ||
weight_decay: 0. | ||
betas: | ||
- 0.9 | ||
- 0.999 | ||
sched: | ||
name: WarmupHoldPolicy | ||
warmup_steps: 0 | ||
hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant | ||
|
||
# Nsys profiling options | ||
nsys_profile: | ||
enabled: False | ||
start_step: 10 # Global batch to start profiling | ||
end_step: 10 # Global batch to end profiling | ||
ranks: [ 0 ] # Global rank IDs to profile | ||
gen_shape: False # Generate model and kernel details including input shapes | ||
|
||
image_logger: | ||
batch_frequency: 1000 | ||
max_images: 4 | ||
|
||
#miscellaneous | ||
seed: 1234 | ||
resume_from_checkpoint: null # manually set the checkpoint file to load from | ||
apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this | ||
gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory) |
Oops, something went wrong.