|
12 | 12 | # See the License for the specific language governing permissions and
|
13 | 13 | # limitations under the License.
|
14 | 14 |
|
| 15 | +import copy |
15 | 16 | import functools
|
16 | 17 | import gc
|
17 | 18 | import logging
|
|
31 | 32 | from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed
|
32 | 33 | from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx
|
33 | 34 | from optimum.exporters.onnx.convert import export_tensorflow as export_tensorflow_onnx
|
34 |
| -from optimum.exporters.utils import _get_submodels_and_export_configs as _default_get_submodels_and_export_configs |
| 35 | +from optimum.exporters.utils import ( |
| 36 | + _get_submodels_and_export_configs as _default_get_submodels_and_export_configs, |
| 37 | +) |
| 38 | +from optimum.exporters.utils import ( |
| 39 | + get_diffusion_models_for_export, |
| 40 | +) |
35 | 41 | from optimum.intel.utils.import_utils import (
|
36 | 42 | _nncf_version,
|
37 | 43 | _open_clip_version,
|
@@ -619,23 +625,27 @@ def export_from_model(
|
619 | 625 | model, library_name, task, preprocessors, custom_export_configs, fn_get_submodels
|
620 | 626 | )
|
621 | 627 |
|
622 |
| - logging.disable(logging.INFO) |
623 |
| - export_config, models_and_export_configs, stateful_submodels = _get_submodels_and_export_configs( |
624 |
| - model=model, |
625 |
| - task=task, |
626 |
| - monolith=False, |
627 |
| - custom_export_configs=custom_export_configs if custom_export_configs is not None else {}, |
628 |
| - custom_architecture=custom_architecture, |
629 |
| - fn_get_submodels=fn_get_submodels, |
630 |
| - preprocessors=preprocessors, |
631 |
| - library_name=library_name, |
632 |
| - model_kwargs=model_kwargs, |
633 |
| - _variant="default", |
634 |
| - legacy=False, |
635 |
| - exporter="openvino", |
636 |
| - stateful=stateful, |
637 |
| - ) |
638 |
| - logging.disable(logging.NOTSET) |
| 628 | + if library_name == "diffusers": |
| 629 | + export_config, models_and_export_configs = get_diffusion_models_for_export_ext(model, exporter="openvino") |
| 630 | + stateful_submodels = False |
| 631 | + else: |
| 632 | + logging.disable(logging.INFO) |
| 633 | + export_config, models_and_export_configs, stateful_submodels = _get_submodels_and_export_configs( |
| 634 | + model=model, |
| 635 | + task=task, |
| 636 | + monolith=False, |
| 637 | + custom_export_configs=custom_export_configs if custom_export_configs is not None else {}, |
| 638 | + custom_architecture=custom_architecture, |
| 639 | + fn_get_submodels=fn_get_submodels, |
| 640 | + preprocessors=preprocessors, |
| 641 | + library_name=library_name, |
| 642 | + model_kwargs=model_kwargs, |
| 643 | + _variant="default", |
| 644 | + legacy=False, |
| 645 | + exporter="openvino", |
| 646 | + stateful=stateful, |
| 647 | + ) |
| 648 | + logging.disable(logging.NOTSET) |
639 | 649 |
|
640 | 650 | if library_name == "open_clip":
|
641 | 651 | if hasattr(model.config, "save_pretrained"):
|
@@ -701,6 +711,10 @@ def export_from_model(
|
701 | 711 | if tokenizer_2 is not None:
|
702 | 712 | tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))
|
703 | 713 |
|
| 714 | + tokenizer_3 = getattr(model, "tokenizer_3", None) |
| 715 | + if tokenizer_3 is not None: |
| 716 | + tokenizer_3.save_pretrained(output.joinpath("tokenizer_3")) |
| 717 | + |
704 | 718 | model.save_config(output)
|
705 | 719 |
|
706 | 720 | export_models(
|
@@ -889,3 +903,218 @@ def _get_submodels_and_export_configs(
|
889 | 903 | )
|
890 | 904 | stateful_per_model = [stateful] * len(models_for_export)
|
891 | 905 | return export_config, models_for_export, stateful_per_model
|
| 906 | + |
| 907 | + |
| 908 | +def get_diffusion_models_for_export_ext( |
| 909 | + pipeline: "DiffusionPipeline", int_dtype: str = "int64", float_dtype: str = "fp32", exporter: str = "openvino" |
| 910 | +): |
| 911 | + try: |
| 912 | + from diffusers import ( |
| 913 | + StableDiffusion3Img2ImgPipeline, |
| 914 | + StableDiffusion3InpaintPipeline, |
| 915 | + StableDiffusion3Pipeline, |
| 916 | + ) |
| 917 | + |
| 918 | + is_sd3 = isinstance( |
| 919 | + pipeline, (StableDiffusion3Pipeline, StableDiffusion3InpaintPipeline, StableDiffusion3Img2ImgPipeline) |
| 920 | + ) |
| 921 | + except ImportError: |
| 922 | + is_sd3 = False |
| 923 | + |
| 924 | + try: |
| 925 | + from diffusers import FluxPipeline |
| 926 | + |
| 927 | + is_flux = isinstance(pipeline, FluxPipeline) |
| 928 | + except ImportError: |
| 929 | + is_flux = False |
| 930 | + |
| 931 | + if not is_sd3 and not is_flux: |
| 932 | + return None, get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter) |
| 933 | + if is_sd3: |
| 934 | + models_for_export = get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype) |
| 935 | + else: |
| 936 | + models_for_export = get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype) |
| 937 | + |
| 938 | + return None, models_for_export |
| 939 | + |
| 940 | + |
| 941 | +def get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype): |
| 942 | + models_for_export = {} |
| 943 | + |
| 944 | + # Text encoder |
| 945 | + text_encoder = getattr(pipeline, "text_encoder", None) |
| 946 | + if text_encoder is not None: |
| 947 | + text_encoder.config.output_hidden_states = True |
| 948 | + text_encoder.text_model.config.output_hidden_states = True |
| 949 | + text_encoder_config_constructor = TasksManager.get_exporter_config_constructor( |
| 950 | + model=text_encoder, |
| 951 | + exporter=exporter, |
| 952 | + library_name="diffusers", |
| 953 | + task="feature-extraction", |
| 954 | + model_type="clip-text-with-projection", |
| 955 | + ) |
| 956 | + text_encoder_export_config = text_encoder_config_constructor( |
| 957 | + pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype |
| 958 | + ) |
| 959 | + models_for_export["text_encoder"] = (text_encoder, text_encoder_export_config) |
| 960 | + |
| 961 | + transformer = pipeline.transformer |
| 962 | + transformer.config.text_encoder_projection_dim = transformer.config.joint_attention_dim |
| 963 | + transformer.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False) |
| 964 | + transformer.config.time_cond_proj_dim = None |
| 965 | + export_config_constructor = TasksManager.get_exporter_config_constructor( |
| 966 | + model=transformer, |
| 967 | + exporter=exporter, |
| 968 | + library_name="diffusers", |
| 969 | + task="semantic-segmentation", |
| 970 | + model_type="sd3-transformer", |
| 971 | + ) |
| 972 | + transformer_export_config = export_config_constructor( |
| 973 | + pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype |
| 974 | + ) |
| 975 | + models_for_export["transformer"] = (transformer, transformer_export_config) |
| 976 | + |
| 977 | + # VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565 |
| 978 | + vae_encoder = copy.deepcopy(pipeline.vae) |
| 979 | + vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters} |
| 980 | + vae_config_constructor = TasksManager.get_exporter_config_constructor( |
| 981 | + model=vae_encoder, |
| 982 | + exporter=exporter, |
| 983 | + library_name="diffusers", |
| 984 | + task="semantic-segmentation", |
| 985 | + model_type="vae-encoder", |
| 986 | + ) |
| 987 | + vae_encoder_export_config = vae_config_constructor( |
| 988 | + vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype |
| 989 | + ) |
| 990 | + models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config) |
| 991 | + |
| 992 | + # VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600 |
| 993 | + vae_decoder = copy.deepcopy(pipeline.vae) |
| 994 | + vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample) |
| 995 | + vae_config_constructor = TasksManager.get_exporter_config_constructor( |
| 996 | + model=vae_decoder, |
| 997 | + exporter=exporter, |
| 998 | + library_name="diffusers", |
| 999 | + task="semantic-segmentation", |
| 1000 | + model_type="vae-decoder", |
| 1001 | + ) |
| 1002 | + vae_decoder_export_config = vae_config_constructor( |
| 1003 | + vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype |
| 1004 | + ) |
| 1005 | + models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config) |
| 1006 | + |
| 1007 | + text_encoder_2 = getattr(pipeline, "text_encoder_2", None) |
| 1008 | + if text_encoder_2 is not None: |
| 1009 | + text_encoder_2.config.output_hidden_states = True |
| 1010 | + text_encoder_2.text_model.config.output_hidden_states = True |
| 1011 | + export_config_constructor = TasksManager.get_exporter_config_constructor( |
| 1012 | + model=text_encoder_2, |
| 1013 | + exporter=exporter, |
| 1014 | + library_name="diffusers", |
| 1015 | + task="feature-extraction", |
| 1016 | + model_type="clip-text-with-projection", |
| 1017 | + ) |
| 1018 | + export_config = export_config_constructor(text_encoder_2.config, int_dtype=int_dtype, float_dtype=float_dtype) |
| 1019 | + models_for_export["text_encoder_2"] = (text_encoder_2, export_config) |
| 1020 | + |
| 1021 | + text_encoder_3 = getattr(pipeline, "text_encoder_3", None) |
| 1022 | + if text_encoder_3 is not None: |
| 1023 | + export_config_constructor = TasksManager.get_exporter_config_constructor( |
| 1024 | + model=text_encoder_3, |
| 1025 | + exporter=exporter, |
| 1026 | + library_name="diffusers", |
| 1027 | + task="feature-extraction", |
| 1028 | + model_type="t5-encoder-model", |
| 1029 | + ) |
| 1030 | + export_config = export_config_constructor( |
| 1031 | + text_encoder_3.config, |
| 1032 | + int_dtype=int_dtype, |
| 1033 | + float_dtype=float_dtype, |
| 1034 | + ) |
| 1035 | + models_for_export["text_encoder_3"] = (text_encoder_3, export_config) |
| 1036 | + |
| 1037 | + return models_for_export |
| 1038 | + |
| 1039 | + |
| 1040 | +def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype): |
| 1041 | + models_for_export = {} |
| 1042 | + |
| 1043 | + # Text encoder |
| 1044 | + text_encoder = getattr(pipeline, "text_encoder", None) |
| 1045 | + if text_encoder is not None: |
| 1046 | + text_encoder_config_constructor = TasksManager.get_exporter_config_constructor( |
| 1047 | + model=text_encoder, |
| 1048 | + exporter=exporter, |
| 1049 | + library_name="diffusers", |
| 1050 | + task="feature-extraction", |
| 1051 | + model_type="clip-text-model", |
| 1052 | + ) |
| 1053 | + text_encoder_export_config = text_encoder_config_constructor( |
| 1054 | + pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype |
| 1055 | + ) |
| 1056 | + models_for_export["text_encoder"] = (text_encoder, text_encoder_export_config) |
| 1057 | + |
| 1058 | + transformer = pipeline.transformer |
| 1059 | + transformer.config.text_encoder_projection_dim = transformer.config.joint_attention_dim |
| 1060 | + transformer.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False) |
| 1061 | + transformer.config.time_cond_proj_dim = None |
| 1062 | + export_config_constructor = TasksManager.get_exporter_config_constructor( |
| 1063 | + model=transformer, |
| 1064 | + exporter=exporter, |
| 1065 | + library_name="diffusers", |
| 1066 | + task="semantic-segmentation", |
| 1067 | + model_type="flux-transformer", |
| 1068 | + ) |
| 1069 | + transformer_export_config = export_config_constructor( |
| 1070 | + pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype |
| 1071 | + ) |
| 1072 | + models_for_export["transformer"] = (transformer, transformer_export_config) |
| 1073 | + |
| 1074 | + # VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565 |
| 1075 | + vae_encoder = copy.deepcopy(pipeline.vae) |
| 1076 | + vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters} |
| 1077 | + vae_config_constructor = TasksManager.get_exporter_config_constructor( |
| 1078 | + model=vae_encoder, |
| 1079 | + exporter=exporter, |
| 1080 | + library_name="diffusers", |
| 1081 | + task="semantic-segmentation", |
| 1082 | + model_type="vae-encoder", |
| 1083 | + ) |
| 1084 | + vae_encoder_export_config = vae_config_constructor( |
| 1085 | + vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype |
| 1086 | + ) |
| 1087 | + models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config) |
| 1088 | + |
| 1089 | + # VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600 |
| 1090 | + vae_decoder = copy.deepcopy(pipeline.vae) |
| 1091 | + vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample) |
| 1092 | + vae_config_constructor = TasksManager.get_exporter_config_constructor( |
| 1093 | + model=vae_decoder, |
| 1094 | + exporter=exporter, |
| 1095 | + library_name="diffusers", |
| 1096 | + task="semantic-segmentation", |
| 1097 | + model_type="vae-decoder", |
| 1098 | + ) |
| 1099 | + vae_decoder_export_config = vae_config_constructor( |
| 1100 | + vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype |
| 1101 | + ) |
| 1102 | + models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config) |
| 1103 | + |
| 1104 | + text_encoder_2 = getattr(pipeline, "text_encoder_2", None) |
| 1105 | + if text_encoder_2 is not None: |
| 1106 | + export_config_constructor = TasksManager.get_exporter_config_constructor( |
| 1107 | + model=text_encoder_2, |
| 1108 | + exporter=exporter, |
| 1109 | + library_name="diffusers", |
| 1110 | + task="feature-extraction", |
| 1111 | + model_type="t5-encoder-model", |
| 1112 | + ) |
| 1113 | + export_config = export_config_constructor( |
| 1114 | + text_encoder_2.config, |
| 1115 | + int_dtype=int_dtype, |
| 1116 | + float_dtype=float_dtype, |
| 1117 | + ) |
| 1118 | + models_for_export["text_encoder_2"] = (text_encoder_2, export_config) |
| 1119 | + |
| 1120 | + return models_for_export |
0 commit comments