Skip to content

Commit

Permalink
Merge branch 'main' into vsarge/seqlen_logging
Browse files Browse the repository at this point in the history
  • Loading branch information
erhoo82 authored Jan 11, 2024
2 parents fe2070e + 8d4218e commit f240597
Show file tree
Hide file tree
Showing 219 changed files with 29,128 additions and 1,072 deletions.
32 changes: 31 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ ARG REQUIRE_AIS_CLI=false

# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
# libavdevice-dev rerquired for latest torchaudio
# libavdevice-dev required for latest torchaudio
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y \
Expand All @@ -42,6 +42,25 @@ RUN apt-get update && \
libavdevice-dev && \
rm -rf /var/lib/apt/lists/*

# libtool, ... , libgts-dev are required for graphviz
# graphviz is required for k2 and pynini visualization
RUN apt-get update && \
apt-get install -y \
libtool \
libltdl-dev \
automake \
autoconf \
bison \
flex \
tcl \
ghostscript \
libgd-dev \
fontconfig \
libcairo2-dev \
libpango1.0-dev \
libgts-dev && \
rm -rf /var/lib/apt/lists/*

WORKDIR /workspace/
# Install megatron core, this can be removed once 0.3 pip package is released
# We leave it here in case we need to work off of a specific commit in main
Expand Down Expand Up @@ -90,6 +109,17 @@ RUN pip install flash-attn
# install numba for latest containers
RUN pip install numba>=0.57.1

COPY scripts /tmp/nemo/scripts/
# install correct graphviz version (k2 and pynini visualization tool), skip if installation fails
RUN INSTALL_MSG=$(/bin/bash /tmp/nemo/scripts/installers/install_graphviz.sh --docker); INSTALL_CODE=$?; \
echo ${INSTALL_MSG}; \
if [ ${INSTALL_CODE} -ne 0 ]; then \
echo "graphviz installation failed"; \
if [ "${REQUIRE_K2}" = true ]; then \
exit ${INSTALL_CODE}; \
else echo "Skipping failed graphviz installation"; fi \
else echo "graphviz installed successfully"; fi

# install k2, skip if installation fails
COPY scripts /tmp/nemo/scripts/
RUN INSTALL_MSG=$(/bin/bash /tmp/nemo/scripts/installers/install_k2.sh); INSTALL_CODE=$?; \
Expand Down
4 changes: 4 additions & 0 deletions examples/asr/transcribe_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ class TranscriptionConfig:
# Set to False to return text instead of hypotheses from the transcribe function, so as to save memory
return_hypotheses: bool = True

# key for groundtruth text in manifest
gt_text_attr_name: str = "text"


@hydra_runner(config_name="TranscriptionConfig", schema=TranscriptionConfig)
def main(cfg: TranscriptionConfig) -> Union[TranscriptionConfig, List[Hypothesis]]:
Expand Down Expand Up @@ -370,6 +373,7 @@ def autocast(dtype=None):
if cfg.calculate_wer:
output_manifest_w_wer, total_res, _ = cal_write_wer(
pred_manifest=output_filename,
gt_text_attr_name=cfg.gt_text_attr_name,
pred_text_attr_name=pred_text_attr_name,
clean_groundtruth_text=cfg.clean_groundtruth_text,
langid=cfg.langid,
Expand Down
2 changes: 1 addition & 1 deletion examples/multimodal/convert_ckpt_to_nemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from nemo.collections.multimodal.models.text_to_image.imagen import MegatronImagen
from nemo.collections.multimodal.models.text_to_image.instruct_pix2pix.ldm.ddpm_edit import MegatronLatentDiffusionEdit
from nemo.collections.multimodal.models.text_to_image.stable_diffusion.ldm.ddpm import MegatronLatentDiffusion
from nemo.collections.multimodal.models.vision_language_foundation.clip import MegatronCLIPModel
from nemo.collections.multimodal.models.vision_language_foundation.clip.megatron_clip_models import MegatronCLIPModel
from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder
from nemo.collections.nlp.parts.nlp_overrides import NLPSaveRestoreConnector
from nemo.utils import AppState, logging
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: stable-diffusion-train

infer:
unconditional_guidance_scale: 3
num_images_per_prompt: 4
hint_image_size: 512
height: 512
width: 512
down_factor: 8
inference_steps: 50
sampler_type: 'DDIM'
eta: 0
output_type: 'pil'
save_to_file: True
out_path: 'controlnet'
seed: 355
prompts:
- high quality picture of a house in oil painting style
control:
- /datasets/coco-stuff/house.png #images/val2017/000000001584.jpg
# Depending on the input control, if the input control is already the conditioning image, null should be passed here
# If a reconstruction target is used as control, then preprocessing function that turns it into a conditioning image needs to be specified
control_image_preprocess:

trainer:
devices: 1
num_nodes: 1
accelerator: gpu
precision: 16
logger: False # logger provided by exp_manager

model:
restore_from_path: /ckpts/controlnet/30k.nemo
precision: ${trainer.precision}
strength: 2.0
guess_mode: False
222 changes: 222 additions & 0 deletions examples/multimodal/text_to_image/controlnet/conf/controlnet_v1-5.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
trainer:
devices: 2
num_nodes: 1
accelerator: gpu
precision: 16
logger: False # logger provided by exp_manager
enable_checkpointing: False
use_distributed_sampler: True
max_epochs: 3 # PTL default. In practice, max_steps will be reached first.
max_steps: -1 # consumed_samples = global_step * micro_batch_size * data_parallel_size * accumulate_grad_batches
log_every_n_steps: 10
accumulate_grad_batches: 1 # do not modify, grad acc is automatic for training megatron models
gradient_clip_val: 1.0
benchmark: False
enable_model_summary: True
limit_val_batches: 0


exp_manager:
explicit_log_dir: null
exp_dir: null
name: controlnet
create_wandb_logger: False
wandb_logger_kwargs:
project: stable-diffusion
group: controlnet
name: controlnet-v1.5
resume: True
create_checkpoint_callback: True
create_tensorboard_logger: True
checkpoint_callback_params:
save_top_k: -1
every_n_train_steps: 5000
every_n_epochs: 0
monitor: reduced_train_loss
filename: 'controlnet--{reduced_train_loss:.2f}-{step}-{consumed_samples}'
resume_if_exists: True
resume_ignore_no_checkpoint: True
resume_from_checkpoint: ${model.resume_from_checkpoint}
ema:
enable: False
decay: 0.9999
validate_original_weights: False
every_n_steps: 1
cpu_offload: False




model:
precision: ${trainer.precision}
# specify micro_batch_size, global_batch_size, and model parallelism
# gradient accumulation will be done automatically based on data_parallel_size
micro_batch_size: 4 # limited by GPU memory
global_batch_size: 8

linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: images
cond_stage_key: captions
control_key: hint
image_size: 64
channels: 4
cond_stage_trainable: false
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False
scale_by_std: False
ckpt_path:
ignore_keys: [ ]
parameterization: eps
clip_denoised: True
load_only_unet: False
cosine_s: 8e-3
given_betas:
original_elbo_weight: 0
v_posterior: 0
l_simple_weight: 1
use_positional_encodings: False
learn_logvar: False
logvar_init: 0
beta_schedule: linear
loss_type: l2
learning_rate: 1.0e-04
concat_mode: True
cond_stage_forward:
text_embedding_dropout_rate: 0.0
fused_opt: True
inductor: False
inductor_cudagraphs: False
capture_cudagraph_iters: -1 # -1 to disable
channels_last: True
only_mid_control: False
sd_locked: True

control_stage_config:
_target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlNet
params:
from_pretrained_unet: /ckpts/v1-5-pruned.ckpt
from_NeMo: True
image_size: 32 # unused
in_channels: 4
hint_channels: 3
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
use_linear_in_transformer: False
transformer_depth: 1
context_dim: 768
use_checkpoint: False
legacy: False
use_flash_attention: False

unet_config:
_target_: nemo.collections.multimodal.models.controlnet.controlnet.ControlledUnetModel
from_pretrained: /ckpts/v1-5-pruned.ckpt
from_NeMo: True
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions:
- 4
- 2
- 1
num_res_blocks: 2
channel_mult:
- 1
- 2
- 4
- 4
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
use_checkpoint: False
legacy: False
use_flash_attention: False

first_stage_config:
_target_: nemo.collections.multimodal.models.stable_diffusion.ldm.autoencoder.AutoencoderKL
from_pretrained: /ckpts/vae.bin
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity

cond_stage_config:
_target_: nemo.collections.multimodal.modules.stable_diffusion.encoders.modules.FrozenCLIPEmbedder
version: openai/clip-vit-large-patch14
device: cuda
max_length: 77

data:
num_workers: 16
synthetic_data: False # dataset_path and local_root_path can be empty when using synthetic data
synthetic_data_length: 10000
train:
dataset_path:
#- /datasets/tarfiles/fill50k.pkl
- /datasets/coco-stuff/coco-stuff-tarfiles/wdinfo-coco-stuff.pkl
augmentations:
resize_smallest_side: 512
center_crop_h_w: 512, 512
horizontal_flip: False
filterings:

webdataset:
infinite_sampler: False
local_root_path: /datasets/coco-stuff/coco-stuff-tarfiles

optim:
name: fused_adam
lr: 2e-5
weight_decay: 0.
betas:
- 0.9
- 0.999
sched:
name: WarmupHoldPolicy
warmup_steps: 0
hold_steps: 10000000000000 # Incredibly large value to hold the lr as constant

# Nsys profiling options
nsys_profile:
enabled: False
start_step: 10 # Global batch to start profiling
end_step: 10 # Global batch to end profiling
ranks: [ 0 ] # Global rank IDs to profile
gen_shape: False # Generate model and kernel details including input shapes

image_logger:
batch_frequency: 1000
max_images: 4

#miscellaneous
seed: 1234
resume_from_checkpoint: null # manually set the checkpoint file to load from
apex_transformer_log_level: 30 # Python logging level displays logs with severity greater than or equal to this
gradient_as_bucket_view: True # PyTorch DDP argument. Allocate gradients in a contiguous bucket to save memory (less fragmentation and buffer memory)
Loading

0 comments on commit f240597

Please sign in to comment.