Skip to content

Commit 6fb7369

Browse files
committed
initial
1 parent 562ddce commit 6fb7369

File tree

3 files changed

+50
-25
lines changed

3 files changed

+50
-25
lines changed

onnxruntime/python/tools/transformers/models/stable_diffusion/README.md

+12
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,14 @@ pip install optimum diffusers onnx onnxruntime-gpu
212212
optimum-cli export onnx --model stabilityai/stable-diffusion-xl-base-1.0 --task stable-diffusion-xl ./sd_xl_base_onnx
213213
```
214214

215+
SD3 and Flux requires transformers >= 4.45, and optimum > 1.23.3:
216+
```
217+
git clone https://github.com/huggingface/optimum
218+
pip install -e .
219+
optimum-cli export onnx --model stabilityai/stable-diffusion-3-medium-diffusers sd3_onnx_fp32
220+
optimum-cli export onnx --model stabilityai/stable-diffusion-3.5-medium-diffusers sd3.5_onnx_fp32
221+
```
222+
215223
### Optimize ONNX Pipeline
216224

217225
Example to optimize the exported float32 ONNX models, and save to float16 models:
@@ -230,6 +238,10 @@ For SDXL model, it is recommended to use a machine with 48 GB or more memory to
230238
python optimize_pipeline.py -i ./sd_xl_base_onnx -o ./sd_xl_base_fp16 --float16
231239
```
232240

241+
For SD3 model:
242+
```
243+
python optimize_pipeline.py -i sd3_onnx_fp32 -o sd3_onnx_fp16 --float16
244+
```
233245
### Run Benchmark
234246

235247
The benchmark.py script will run a warm-up prompt twice, and measure the peak GPU memory usage in these two runs, then record them as first_run_memory_MB and second_run_memory_MB. Then it will run 5 runs to get average latency (in seconds), and output the results to benchmark_result.csv.

onnxruntime/python/tools/transformers/models/stable_diffusion/benchmark.py

+7-14
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@
2222
"2.0": "stabilityai/stable-diffusion-2",
2323
"2.1": "stabilityai/stable-diffusion-2-1",
2424
"xl-1.0": "stabilityai/stable-diffusion-xl-refiner-1.0",
25+
"3.0": "stabilityai/stable-diffusion-3-medium-diffusers",
26+
# "3.5": "stabilityai/stable-diffusion-3.5-medium",
27+
# "3.5-large": "stabilityai/stable-diffusion-3.5-large",
28+
# "flux.1-schnell": "black-forest-labs/FLUX.1-schnell",
29+
# "flux.1-dev": "black-forest-labs/FLUX.1-dev",
2530
}
2631

2732
PROVIDERS = {
@@ -322,22 +327,10 @@ def get_optimum_ort_pipeline(
322327
disable_safety_checker: bool = True,
323328
use_io_binding: bool = False,
324329
):
325-
from optimum.onnxruntime import ORTStableDiffusionPipeline, ORTStableDiffusionXLPipeline
330+
from optimum.onnxruntime import ORTPipelineForText2Image, ORTStableDiffusionPipeline, ORTStableDiffusionXLPipeline
326331

327332
if directory is not None and os.path.exists(directory):
328-
if "xl" in model_name:
329-
pipeline = ORTStableDiffusionXLPipeline.from_pretrained(
330-
directory,
331-
provider=provider,
332-
session_options=None,
333-
use_io_binding=False, # Not supported by Optimum version 1.17.1 at the time of verification.
334-
)
335-
else:
336-
pipeline = ORTStableDiffusionPipeline.from_pretrained(
337-
directory,
338-
provider=provider,
339-
use_io_binding=use_io_binding,
340-
)
333+
pipeline = ORTPipelineForText2Image.from_pretrained(directory, provider=provider, use_io_binding=use_io_binding)
341334
elif "xl" in model_name:
342335
pipeline = ORTStableDiffusionXLPipeline.from_pretrained(
343336
model_name,

onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py

+31-11
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import coloredlogs
2828
import onnx
2929
from fusion_options import FusionOptions
30+
from onnx_model_bert import BertOnnxModel
3031
from onnx_model_clip import ClipOnnxModel
3132
from onnx_model_unet import UnetOnnxModel
3233
from onnx_model_vae import VaeOnnxModel
@@ -46,9 +47,20 @@ def has_external_data(onnx_model_path):
4647
return False
4748

4849

50+
def _get_model_list(source_dir: Path):
51+
is_xl = (source_dir / "text_encoder_2").exists()
52+
is_sd3 = (source_dir / "text_encoder_3").exists()
53+
model_list_sd3 = ["text_encoder", "text_encoder_2", "text_encoder_3", "transformer", "vae_encoder", "vae_decoder"]
54+
model_list_sdxl = ["text_encoder", "text_encoder_2", "unet", "vae_encoder", "vae_decoder"]
55+
model_list_sd = ["text_encoder", "unet", "vae_encoder", "vae_decoder"]
56+
model_list = model_list_sd3 if is_sd3 else (model_list_sdxl if is_xl else model_list_sd)
57+
return model_list
58+
59+
4960
def _optimize_sd_pipeline(
5061
source_dir: Path,
5162
target_dir: Path,
63+
model_list: List[str],
5264
use_external_data_format: Optional[bool],
5365
float16: bool,
5466
force_fp32_ops: List[str],
@@ -60,6 +72,7 @@ def _optimize_sd_pipeline(
6072
Args:
6173
source_dir (Path): Root of input directory of stable diffusion onnx pipeline with float32 models.
6274
target_dir (Path): Root of output directory of stable diffusion onnx pipeline with optimized models.
75+
model_list (List[str]): list of directory names with onnx model.
6376
use_external_data_format (Optional[bool]): use external data format.
6477
float16 (bool): use half precision
6578
force_fp32_ops(List[str]): operators that are forced to run in float32.
@@ -70,18 +83,21 @@ def _optimize_sd_pipeline(
7083
RuntimeError: output onnx model path existed
7184
"""
7285
model_type_mapping = {
86+
"transformer": "mmdit",
7387
"unet": "unet",
7488
"vae_encoder": "vae",
7589
"vae_decoder": "vae",
7690
"text_encoder": "clip",
7791
"text_encoder_2": "clip",
7892
"safety_checker": "unet",
93+
"text_encoder_3": "clip",
7994
}
8095

8196
model_type_class_mapping = {
8297
"unet": UnetOnnxModel,
8398
"vae": VaeOnnxModel,
8499
"clip": ClipOnnxModel,
100+
"mmdit": BertOnnxModel, # TODO: have a new class for DiT
85101
}
86102

87103
force_fp32_operators = {
@@ -91,10 +107,10 @@ def _optimize_sd_pipeline(
91107
"text_encoder": [],
92108
"text_encoder_2": [],
93109
"safety_checker": [],
110+
"text_encoder_3": [],
111+
"transformer": [],
94112
}
95113

96-
is_xl = (source_dir / "text_encoder_2").exists()
97-
98114
if force_fp32_ops:
99115
for fp32_operator in force_fp32_ops:
100116
parts = fp32_operator.split(":")
@@ -108,8 +124,8 @@ def _optimize_sd_pipeline(
108124
for name, model_type in model_type_mapping.items():
109125
onnx_model_path = source_dir / name / "model.onnx"
110126
if not os.path.exists(onnx_model_path):
111-
if name != "safety_checker":
112-
logger.info("input onnx model does not exist: %s", onnx_model_path)
127+
if name != "safety_checker" and name in model_list:
128+
logger.warning("input onnx model does not exist: %s", onnx_model_path)
113129
# some model are optional so we do not raise error here.
114130
continue
115131

@@ -122,7 +138,7 @@ def _optimize_sd_pipeline(
122138
use_external_data_format = has_external_data(onnx_model_path)
123139

124140
# Graph fusion before fp16 conversion, otherwise they cannot be fused later.
125-
logger.info(f"Optimize {onnx_model_path}...")
141+
logger.info("Optimize %s ...", onnx_model_path)
126142

127143
args.model_type = model_type
128144
fusion_options = FusionOptions.parse(args)
@@ -147,6 +163,7 @@ def _optimize_sd_pipeline(
147163

148164
if float16:
149165
# For SD-XL, use FP16 in VAE decoder will cause NaN and black image so we keep it in FP32.
166+
is_xl = (source_dir / "text_encoder_2").exists()
150167
if is_xl and name == "vae_decoder":
151168
logger.info("Skip converting %s to float16 to avoid NaN", name)
152169
else:
@@ -181,17 +198,18 @@ def _optimize_sd_pipeline(
181198
logger.info("*" * 20)
182199

183200

184-
def _copy_extra_directory(source_dir: Path, target_dir: Path):
201+
def _copy_extra_directory(source_dir: Path, target_dir: Path, model_list: List[str]):
185202
"""Copy extra directory that does not have onnx model
186203
187204
Args:
188205
source_dir (Path): source directory
189206
target_dir (Path): target directory
207+
model_list (List[str]): list of directory names with onnx model.
190208
191209
Raises:
192210
RuntimeError: source path does not exist
193211
"""
194-
extra_dirs = ["scheduler", "tokenizer", "tokenizer_2", "feature_extractor"]
212+
extra_dirs = ["scheduler", "tokenizer", "tokenizer_2", "tokenizer_3", "feature_extractor"]
195213

196214
for name in extra_dirs:
197215
source_path = source_dir / name
@@ -213,8 +231,7 @@ def _copy_extra_directory(source_dir: Path, target_dir: Path):
213231
logger.info("%s => %s", source_path, target_path)
214232

215233
# Some directory are optional
216-
onnx_model_dirs = ["text_encoder", "text_encoder_2", "unet", "vae_encoder", "vae_decoder", "safety_checker"]
217-
for onnx_model_dir in onnx_model_dirs:
234+
for onnx_model_dir in model_list:
218235
source_path = source_dir / onnx_model_dir / "config.json"
219236
target_path = target_dir / onnx_model_dir / "config.json"
220237
if source_path.exists():
@@ -236,17 +253,20 @@ def optimize_stable_diffusion_pipeline(
236253
if overwrite:
237254
shutil.rmtree(output_dir, ignore_errors=True)
238255
else:
239-
raise RuntimeError("output directory existed:{output_dir}. Add --overwrite to empty the directory.")
256+
raise RuntimeError(f"output directory existed:{output_dir}. Add --overwrite to empty the directory.")
240257

241258
source_dir = Path(input_dir)
242259
target_dir = Path(output_dir)
243260
target_dir.mkdir(parents=True, exist_ok=True)
244261

245-
_copy_extra_directory(source_dir, target_dir)
262+
model_list = _get_model_list(source_dir)
263+
264+
_copy_extra_directory(source_dir, target_dir, model_list)
246265

247266
_optimize_sd_pipeline(
248267
source_dir,
249268
target_dir,
269+
model_list,
250270
use_external_data_format,
251271
float16,
252272
args.force_fp32_ops,

0 commit comments

Comments
 (0)