Skip to content

Commit 86598a6

Browse files
sd3 pipeline support (#916)
* WIP: conversion and pipeline base * Support SD3 * img2img pipeline * fix model export * update after migration on new pipeline style * fix inference issues * fix missed tokenizer export * add support in quantization * Update optimum/intel/openvino/modeling_diffusion.py Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> * add tests * fix tests * update tests * Update tests/openvino/utils_tests.py Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> * fix tests * add export tests * fix cli tests * use fp32 timesteps * add flux * fix after black update * apply review comments * compatibility with diffusers 0.31.0 * apply review comments * Update tests/openvino/test_diffusion.py Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> * Update tests/openvino/test_diffusion.py Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> --------- Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
1 parent a432102 commit 86598a6

17 files changed

+1001
-110
lines changed

optimum/commands/export/openvino.py

+4
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,10 @@ def run(self):
318318
from optimum.intel import OVStableDiffusionPipeline
319319

320320
model_cls = OVStableDiffusionPipeline
321+
elif class_name == "StableDiffusion3Pipeline":
322+
from optimum.intel import OVStableDiffusion3Pipeline
323+
324+
model_cls = OVStableDiffusion3Pipeline
321325
else:
322326
raise NotImplementedError(f"Quantization in hybrid mode isn't supported for class {class_name}.")
323327

optimum/exporters/openvino/__main__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,7 @@ def maybe_convert_tokenizers(library_name: str, output: Path, model=None, prepro
493493
f"models won't be generated. Exception: {exception}"
494494
)
495495
elif model:
496-
for tokenizer_name in ("tokenizer", "tokenizer_2"):
496+
for tokenizer_name in ("tokenizer", "tokenizer_2", "tokenizer_3"):
497497
tokenizer = getattr(model, tokenizer_name, None)
498498
if tokenizer:
499499
export_tokenizer(tokenizer, output / tokenizer_name, task=task)

optimum/exporters/openvino/convert.py

+247-18
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import copy
1516
import functools
1617
import gc
1718
import logging
@@ -31,7 +32,12 @@
3132
from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed
3233
from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx
3334
from optimum.exporters.onnx.convert import export_tensorflow as export_tensorflow_onnx
34-
from optimum.exporters.utils import _get_submodels_and_export_configs as _default_get_submodels_and_export_configs
35+
from optimum.exporters.utils import (
36+
_get_submodels_and_export_configs as _default_get_submodels_and_export_configs,
37+
)
38+
from optimum.exporters.utils import (
39+
get_diffusion_models_for_export,
40+
)
3541
from optimum.intel.utils.import_utils import (
3642
_nncf_version,
3743
_open_clip_version,
@@ -619,23 +625,27 @@ def export_from_model(
619625
model, library_name, task, preprocessors, custom_export_configs, fn_get_submodels
620626
)
621627

622-
logging.disable(logging.INFO)
623-
export_config, models_and_export_configs, stateful_submodels = _get_submodels_and_export_configs(
624-
model=model,
625-
task=task,
626-
monolith=False,
627-
custom_export_configs=custom_export_configs if custom_export_configs is not None else {},
628-
custom_architecture=custom_architecture,
629-
fn_get_submodels=fn_get_submodels,
630-
preprocessors=preprocessors,
631-
library_name=library_name,
632-
model_kwargs=model_kwargs,
633-
_variant="default",
634-
legacy=False,
635-
exporter="openvino",
636-
stateful=stateful,
637-
)
638-
logging.disable(logging.NOTSET)
628+
if library_name == "diffusers":
629+
export_config, models_and_export_configs = get_diffusion_models_for_export_ext(model, exporter="openvino")
630+
stateful_submodels = False
631+
else:
632+
logging.disable(logging.INFO)
633+
export_config, models_and_export_configs, stateful_submodels = _get_submodels_and_export_configs(
634+
model=model,
635+
task=task,
636+
monolith=False,
637+
custom_export_configs=custom_export_configs if custom_export_configs is not None else {},
638+
custom_architecture=custom_architecture,
639+
fn_get_submodels=fn_get_submodels,
640+
preprocessors=preprocessors,
641+
library_name=library_name,
642+
model_kwargs=model_kwargs,
643+
_variant="default",
644+
legacy=False,
645+
exporter="openvino",
646+
stateful=stateful,
647+
)
648+
logging.disable(logging.NOTSET)
639649

640650
if library_name == "open_clip":
641651
if hasattr(model.config, "save_pretrained"):
@@ -701,6 +711,10 @@ def export_from_model(
701711
if tokenizer_2 is not None:
702712
tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))
703713

714+
tokenizer_3 = getattr(model, "tokenizer_3", None)
715+
if tokenizer_3 is not None:
716+
tokenizer_3.save_pretrained(output.joinpath("tokenizer_3"))
717+
704718
model.save_config(output)
705719

706720
export_models(
@@ -889,3 +903,218 @@ def _get_submodels_and_export_configs(
889903
)
890904
stateful_per_model = [stateful] * len(models_for_export)
891905
return export_config, models_for_export, stateful_per_model
906+
907+
908+
def get_diffusion_models_for_export_ext(
909+
pipeline: "DiffusionPipeline", int_dtype: str = "int64", float_dtype: str = "fp32", exporter: str = "openvino"
910+
):
911+
try:
912+
from diffusers import (
913+
StableDiffusion3Img2ImgPipeline,
914+
StableDiffusion3InpaintPipeline,
915+
StableDiffusion3Pipeline,
916+
)
917+
918+
is_sd3 = isinstance(
919+
pipeline, (StableDiffusion3Pipeline, StableDiffusion3InpaintPipeline, StableDiffusion3Img2ImgPipeline)
920+
)
921+
except ImportError:
922+
is_sd3 = False
923+
924+
try:
925+
from diffusers import FluxPipeline
926+
927+
is_flux = isinstance(pipeline, FluxPipeline)
928+
except ImportError:
929+
is_flux = False
930+
931+
if not is_sd3 and not is_flux:
932+
return None, get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter)
933+
if is_sd3:
934+
models_for_export = get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype)
935+
else:
936+
models_for_export = get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype)
937+
938+
return None, models_for_export
939+
940+
941+
def get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype):
942+
models_for_export = {}
943+
944+
# Text encoder
945+
text_encoder = getattr(pipeline, "text_encoder", None)
946+
if text_encoder is not None:
947+
text_encoder.config.output_hidden_states = True
948+
text_encoder.text_model.config.output_hidden_states = True
949+
text_encoder_config_constructor = TasksManager.get_exporter_config_constructor(
950+
model=text_encoder,
951+
exporter=exporter,
952+
library_name="diffusers",
953+
task="feature-extraction",
954+
model_type="clip-text-with-projection",
955+
)
956+
text_encoder_export_config = text_encoder_config_constructor(
957+
pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
958+
)
959+
models_for_export["text_encoder"] = (text_encoder, text_encoder_export_config)
960+
961+
transformer = pipeline.transformer
962+
transformer.config.text_encoder_projection_dim = transformer.config.joint_attention_dim
963+
transformer.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False)
964+
transformer.config.time_cond_proj_dim = None
965+
export_config_constructor = TasksManager.get_exporter_config_constructor(
966+
model=transformer,
967+
exporter=exporter,
968+
library_name="diffusers",
969+
task="semantic-segmentation",
970+
model_type="sd3-transformer",
971+
)
972+
transformer_export_config = export_config_constructor(
973+
pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype
974+
)
975+
models_for_export["transformer"] = (transformer, transformer_export_config)
976+
977+
# VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565
978+
vae_encoder = copy.deepcopy(pipeline.vae)
979+
vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters}
980+
vae_config_constructor = TasksManager.get_exporter_config_constructor(
981+
model=vae_encoder,
982+
exporter=exporter,
983+
library_name="diffusers",
984+
task="semantic-segmentation",
985+
model_type="vae-encoder",
986+
)
987+
vae_encoder_export_config = vae_config_constructor(
988+
vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
989+
)
990+
models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config)
991+
992+
# VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600
993+
vae_decoder = copy.deepcopy(pipeline.vae)
994+
vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample)
995+
vae_config_constructor = TasksManager.get_exporter_config_constructor(
996+
model=vae_decoder,
997+
exporter=exporter,
998+
library_name="diffusers",
999+
task="semantic-segmentation",
1000+
model_type="vae-decoder",
1001+
)
1002+
vae_decoder_export_config = vae_config_constructor(
1003+
vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype
1004+
)
1005+
models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config)
1006+
1007+
text_encoder_2 = getattr(pipeline, "text_encoder_2", None)
1008+
if text_encoder_2 is not None:
1009+
text_encoder_2.config.output_hidden_states = True
1010+
text_encoder_2.text_model.config.output_hidden_states = True
1011+
export_config_constructor = TasksManager.get_exporter_config_constructor(
1012+
model=text_encoder_2,
1013+
exporter=exporter,
1014+
library_name="diffusers",
1015+
task="feature-extraction",
1016+
model_type="clip-text-with-projection",
1017+
)
1018+
export_config = export_config_constructor(text_encoder_2.config, int_dtype=int_dtype, float_dtype=float_dtype)
1019+
models_for_export["text_encoder_2"] = (text_encoder_2, export_config)
1020+
1021+
text_encoder_3 = getattr(pipeline, "text_encoder_3", None)
1022+
if text_encoder_3 is not None:
1023+
export_config_constructor = TasksManager.get_exporter_config_constructor(
1024+
model=text_encoder_3,
1025+
exporter=exporter,
1026+
library_name="diffusers",
1027+
task="feature-extraction",
1028+
model_type="t5-encoder-model",
1029+
)
1030+
export_config = export_config_constructor(
1031+
text_encoder_3.config,
1032+
int_dtype=int_dtype,
1033+
float_dtype=float_dtype,
1034+
)
1035+
models_for_export["text_encoder_3"] = (text_encoder_3, export_config)
1036+
1037+
return models_for_export
1038+
1039+
1040+
def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
1041+
models_for_export = {}
1042+
1043+
# Text encoder
1044+
text_encoder = getattr(pipeline, "text_encoder", None)
1045+
if text_encoder is not None:
1046+
text_encoder_config_constructor = TasksManager.get_exporter_config_constructor(
1047+
model=text_encoder,
1048+
exporter=exporter,
1049+
library_name="diffusers",
1050+
task="feature-extraction",
1051+
model_type="clip-text-model",
1052+
)
1053+
text_encoder_export_config = text_encoder_config_constructor(
1054+
pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
1055+
)
1056+
models_for_export["text_encoder"] = (text_encoder, text_encoder_export_config)
1057+
1058+
transformer = pipeline.transformer
1059+
transformer.config.text_encoder_projection_dim = transformer.config.joint_attention_dim
1060+
transformer.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False)
1061+
transformer.config.time_cond_proj_dim = None
1062+
export_config_constructor = TasksManager.get_exporter_config_constructor(
1063+
model=transformer,
1064+
exporter=exporter,
1065+
library_name="diffusers",
1066+
task="semantic-segmentation",
1067+
model_type="flux-transformer",
1068+
)
1069+
transformer_export_config = export_config_constructor(
1070+
pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype
1071+
)
1072+
models_for_export["transformer"] = (transformer, transformer_export_config)
1073+
1074+
# VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565
1075+
vae_encoder = copy.deepcopy(pipeline.vae)
1076+
vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters}
1077+
vae_config_constructor = TasksManager.get_exporter_config_constructor(
1078+
model=vae_encoder,
1079+
exporter=exporter,
1080+
library_name="diffusers",
1081+
task="semantic-segmentation",
1082+
model_type="vae-encoder",
1083+
)
1084+
vae_encoder_export_config = vae_config_constructor(
1085+
vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
1086+
)
1087+
models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config)
1088+
1089+
# VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600
1090+
vae_decoder = copy.deepcopy(pipeline.vae)
1091+
vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample)
1092+
vae_config_constructor = TasksManager.get_exporter_config_constructor(
1093+
model=vae_decoder,
1094+
exporter=exporter,
1095+
library_name="diffusers",
1096+
task="semantic-segmentation",
1097+
model_type="vae-decoder",
1098+
)
1099+
vae_decoder_export_config = vae_config_constructor(
1100+
vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype
1101+
)
1102+
models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config)
1103+
1104+
text_encoder_2 = getattr(pipeline, "text_encoder_2", None)
1105+
if text_encoder_2 is not None:
1106+
export_config_constructor = TasksManager.get_exporter_config_constructor(
1107+
model=text_encoder_2,
1108+
exporter=exporter,
1109+
library_name="diffusers",
1110+
task="feature-extraction",
1111+
model_type="t5-encoder-model",
1112+
)
1113+
export_config = export_config_constructor(
1114+
text_encoder_2.config,
1115+
int_dtype=int_dtype,
1116+
float_dtype=float_dtype,
1117+
)
1118+
models_for_export["text_encoder_2"] = (text_encoder_2, export_config)
1119+
1120+
return models_for_export

0 commit comments

Comments
 (0)