Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion examples/stable-diffusion/plot_loss_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import numpy as np
from scipy.signal import savgol_filter

SAMPLE=50
def sample(x):
return x//500

def test():
def match(x,y):
Expand Down Expand Up @@ -77,18 +78,22 @@ def parse(flnm, smooth_fn=lambda x:x, clip_first=100):
if not filter_fn(ln):
continue
step, step_loss = strip_ln(ln)

if 'step_loss' in ln:
if prev_step != int(step):
loss.append(last_loss)
steps.append(int(prev_step))
prev_step = int(step)
#print("\nstep/loss", step, last_loss)
else:
last_loss = float(step_loss)
#TODO: parse eval epoch?
loss.append(last_loss)
steps.append(int(prev_step))
SAMPLE= sample(len(loss))
loss = (loss[clip_first:])[::SAMPLE]
steps = (steps[clip_first:])[::SAMPLE]

loss = smooth_fn(loss)
#epoch=fix(epoch) #TODO uncomment this
return steps, loss, eval_epoch, eval_samples_per_sec, flnm.split('/')[-1].split('.')[0]
Expand Down
16 changes: 4 additions & 12 deletions examples/stable-diffusion/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,11 @@ python train_text_to_image_sdxl.py \
--center_crop \
--random_flip \
--proportion_empty_prompts=0.2 \
--train_batch_size 1 \
--gradient_accumulation_steps 4 \
--gradient_checkpointing \
--max_train_steps 1 \
--train_batch_size 16\
--max_train_steps 10000 \
--learning_rate 1e-06 \
--lr_scheduler constant \
--lr_warmup_steps 0 \
--gaudi_config Habana/stable-diffusion \
--bf16 \
--validation_prompt="a cute Sundar Pichai creature" \
--validation_epochs 5 \
--checkpointing_steps=5000 \
--output_dir sdxl-pokemon-model \
--gaudi_config_name Habana/stable-diffusion \
--throughput_warmup_steps 3 \
--use_hpu_graphs \
--cache_dir /root/software/data/pytorch/huggingface/sdxl 2>&1 | tee log.txt
--cache_dir /root/software/data/pytorch/huggingface/sdxl
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@ python train_text_to_image_sdxl.py \
--center_crop \
--random_flip \
--proportion_empty_prompts=0.2 \
--train_batch_size 1 \
--gradient_accumulation_steps 4 \
--max_train_steps 1 \
--train_batch_size 16 \
--max_train_steps 2500 \
--learning_rate 1e-05 \
--max_grad_norm 1 \
--lr_scheduler constant \
Expand All @@ -17,7 +16,8 @@ python train_text_to_image_sdxl.py \
--gaudi_config_name Habana/stable-diffusion \
--throughput_warmup_steps 3 \
--bf16 \
--validation_prompt="a cute Sundar Pichai creature" \
--validation_epochs 5 \
--validation_prompt="a horse running on the beach during sunset" \
--validation_epochs 48 \
--use_hpu_graphs \
--cache_dir /root/software/data/pytorch/huggingface/sdxl 2>&1 | tee log.txt
--checkpointing_steps 2500 \
--cache_dir /root/software/data/pytorch/huggingface/sdxl 2>&1 | tee log_1x_bs16.txt
18 changes: 8 additions & 10 deletions examples/stable-diffusion/train_text_to_image_sdxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,17 +1025,17 @@ def unwrap_model(model):

with accelerator.accumulate(unet):
# Sample noise that we'll add to the latents
model_input = batch["model_input"].to(dtype=weight_dtype).to(accelerator.device)

model_input = batch["model_input"].to(dtype=weight_dtype)

noise = torch.randn_like(model_input)
if args.noise_offset:
# https://www.crosslabs.org//blog/diffusion-with-offset-noise
# torch.randn is broken on HPU so we need workaround using CPU here
#rand_device = "cpu" if model_input.device.type == "hpu" else model_input.device
rand_device = model_input.device
noise += args.noise_offset * torch.randn(
(model_input.shape[0], model_input.shape[1], 1, 1), device=rand_device
)
noise = noise.to(model_input.device)
noise = noise.to(model_input.device)

bsz = model_input.shape[0]

Expand All @@ -1056,7 +1056,6 @@ def unwrap_model(model):
# Add noise to the model input according to the noise magnitude at each timestep
# (this is the forward diffusion process)
noisy_model_input = noise_scheduler.add_noise(model_input, noise, timesteps)

# time ids
def compute_time_ids(original_size, crops_coords_top_left):
# Adapted from pipeline.StableDiffusionXLPipeline._get_add_time_ids
Expand All @@ -1069,7 +1068,6 @@ def compute_time_ids(original_size, crops_coords_top_left):
add_time_ids = torch.cat(
[compute_time_ids(s, c) for s, c in zip(batch["original_sizes"], batch["crop_top_lefts"])]
)

# Predict the noise residual
unet_added_conditions = {"time_ids": add_time_ids}
prompt_embeds = batch["prompt_embeds"].to(accelerator.device)
Expand Down Expand Up @@ -1121,14 +1119,14 @@ def compute_time_ids(original_size, crops_coords_top_left):

# Gather the losses across all processes for logging (if we use distributed training).
avg_loss = accelerator.gather(loss.repeat(args.train_batch_size)).mean()
train_loss += avg_loss.item() / args.gradient_accumulation_steps
train_loss += avg_loss / args.gradient_accumulation_steps

# Backpropagate
#TODO: check why this cause bufferoverflow issue
#with accelerator.autocast():
#with torch.autocast(device_type="hpu", dtype=weight_dtype, enabled=True):
accelerator.backward(loss)
htcore.mark_step()

if accelerator.sync_gradients:
params_to_clip = unet.parameters()
accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm)
Expand Down Expand Up @@ -1177,7 +1175,7 @@ def compute_time_ids(original_size, crops_coords_top_left):
break

if accelerator.is_main_process:
if args.validation_prompt is not None and epoch % args.validation_epochs == 0:
if args.validation_prompt is not None and (epoch+1) % args.validation_epochs == 0:
logger.info(
f"Running validation... \n Generating {args.num_validation_images} images with prompt:"
f" {args.validation_prompt}."
Expand Down