28
28
from openvino .tools .ovc import convert_model
29
29
from optimum .exporters import TasksManager
30
30
from optimum .exporters .utils import (
31
- _get_submodels_and_export_configs as _default_get_submodels_and_export_configs ,
31
+ DECODER_NAME ,
32
+ ENCODER_NAME ,
33
+ _get_submodels_for_export_encoder_decoder ,
34
+ get_diffusion_models_for_export ,
32
35
)
33
36
from optimum .exporters .utils import (
34
- get_diffusion_models_for_export ,
37
+ _get_submodels_and_export_configs as _default_get_submodels_and_export_configs ,
35
38
)
36
39
from optimum .intel .utils .import_utils import (
37
40
_diffusers_version ,
43
46
_torch_version ,
44
47
_transformers_version ,
45
48
compare_versions ,
46
- is_diffusers_version ,
47
49
is_openvino_tokenizers_version ,
48
50
is_openvino_version ,
49
51
is_tokenizers_version ,
@@ -101,15 +103,18 @@ def _set_runtime_options(
101
103
):
102
104
for model_name in models_and_export_configs .keys ():
103
105
_ , sub_export_config = models_and_export_configs [model_name ]
104
- sub_export_config .runtime_options = {}
106
+ if not hasattr (sub_export_config , "runtime_options" ):
107
+ sub_export_config .runtime_options = {}
105
108
if (
106
- "diffusers" in library_name
107
- or "text-generation" in task
109
+ "text-generation" in task
108
110
or ("image-text-to-text" in task and model_name == "language_model" )
111
+ or getattr (sub_export_config , "stateful" , False )
109
112
):
110
113
sub_export_config .runtime_options ["ACTIVATIONS_SCALE_FACTOR" ] = "8.0"
111
114
if not quantized_model and (
112
- "text-generation" in task or ("image-text-to-text" in task and model_name == "language_model" )
115
+ "text-generation" in task
116
+ or ("image-text-to-text" in task and model_name == "language_model" )
117
+ or getattr (sub_export_config , "stateful" , False )
113
118
):
114
119
sub_export_config .runtime_options ["KV_CACHE_PRECISION" ] = "f16"
115
120
@@ -642,10 +647,14 @@ def export_from_model(
642
647
643
648
logger .info (f"Automatic task detection to: { task } ." )
644
649
650
+ is_encoder_decoder = getattr (getattr (model , "config" , {}), "is_encoder_decoder" , False )
651
+ model_type = getattr (getattr (model , "config" , {}), "model_type" , "" )
645
652
stateful = stateful and (
646
- ensure_export_task_support_stateful (task )
647
- or ensure_model_type_support_stateful (getattr (getattr (model , "config" , {}), "model_type" , "" ))
653
+ ensure_export_task_support_stateful (task ) or ensure_model_type_support_stateful (model_type )
648
654
)
655
+
656
+ if stateful and is_encoder_decoder and not getattr (model , "_supports_cache_class" , False ):
657
+ stateful = False
649
658
# TODO: support onnx_config.py in the model repo
650
659
if custom_architecture and custom_export_configs is None :
651
660
raise ValueError (
@@ -687,6 +696,11 @@ def export_from_model(
687
696
if library_name == "diffusers" :
688
697
export_config , models_and_export_configs = get_diffusion_models_for_export_ext (model , exporter = "openvino" )
689
698
stateful_submodels = False
699
+ elif stateful and is_encoder_decoder and not custom_architecture :
700
+ export_config , models_and_export_configs = _get_encoder_decoder_stateful_models_for_export (
701
+ model = model , task = task , preprocessors = preprocessors , library_name = library_name , _variant = "default"
702
+ )
703
+ stateful_submodels = [False , True ]
690
704
else :
691
705
logging .disable (logging .INFO )
692
706
export_config , models_and_export_configs , stateful_submodels = _get_submodels_and_export_configs (
@@ -999,45 +1013,29 @@ def _get_submodels_and_export_configs(
999
1013
def get_diffusion_models_for_export_ext (
1000
1014
pipeline : "DiffusionPipeline" , int_dtype : str = "int64" , float_dtype : str = "fp32" , exporter : str = "openvino"
1001
1015
):
1002
- if is_diffusers_version (">=" , "0.29.0" ):
1003
- from diffusers import StableDiffusion3Img2ImgPipeline , StableDiffusion3Pipeline
1004
-
1005
- sd3_pipes = [StableDiffusion3Pipeline , StableDiffusion3Img2ImgPipeline ]
1006
- if is_diffusers_version (">=" , "0.30.0" ):
1007
- from diffusers import StableDiffusion3InpaintPipeline
1008
-
1009
- sd3_pipes .append (StableDiffusion3InpaintPipeline )
1010
-
1011
- is_sd3 = isinstance (pipeline , tuple (sd3_pipes ))
1012
- else :
1013
- is_sd3 = False
1014
-
1015
- if is_diffusers_version (">=" , "0.30.0" ):
1016
- from diffusers import FluxPipeline
1017
-
1018
- flux_pipes = [FluxPipeline ]
1019
-
1020
- if is_diffusers_version (">=" , "0.31.0" ):
1021
- from diffusers import FluxImg2ImgPipeline , FluxInpaintPipeline
1022
-
1023
- flux_pipes .extend ([FluxPipeline , FluxImg2ImgPipeline , FluxInpaintPipeline ])
1024
-
1025
- if is_diffusers_version (">=" , "0.32.0" ):
1026
- from diffusers import FluxFillPipeline
1027
-
1028
- flux_pipes .append (FluxFillPipeline )
1029
-
1030
- is_flux = isinstance (pipeline , tuple (flux_pipes ))
1031
- else :
1032
- is_flux = False
1033
-
1034
- if not is_sd3 and not is_flux :
1035
- return None , get_diffusion_models_for_export (pipeline , int_dtype , float_dtype , exporter )
1036
- if is_sd3 :
1016
+ is_sdxl = pipeline .__class__ .__name__ .startswith ("StableDiffusionXL" )
1017
+ is_sd3 = pipeline .__class__ .__name__ .startswith ("StableDiffusion3" )
1018
+ is_flux = pipeline .__class__ .__name__ .startswith ("Flux" )
1019
+ is_sd = pipeline .__class__ .__name__ .startswith ("StableDiffusion" ) and not is_sd3
1020
+ is_lcm = pipeline .__class__ .__name__ .startswith ("LatentConsistencyModel" )
1021
+
1022
+ if is_sd or is_sdxl or is_lcm :
1023
+ models_for_export = get_diffusion_models_for_export (pipeline , int_dtype , float_dtype , exporter )
1024
+ if is_sdxl and pipeline .vae .config .force_upcast :
1025
+ models_for_export ["vae_encoder" ][1 ].runtime_options = {"ACTIVATIONS_SCALE_FACTOR" : "128.0" }
1026
+ models_for_export ["vae_decoder" ][1 ].runtime_options = {"ACTIVATIONS_SCALE_FACTOR" : "128.0" }
1027
+
1028
+ # only SD 2.1 has overflow issue, it uses different prediction_type than other models
1029
+ if is_sd and pipeline .scheduler .config .prediction_type == "v_prediction" :
1030
+ models_for_export ["vae_encoder" ][1 ].runtime_options = {"ACTIVATIONS_SCALE_FACTOR" : "8.0" }
1031
+ models_for_export ["vae_decoder" ][1 ].runtime_options = {"ACTIVATIONS_SCALE_FACTOR" : "8.0" }
1032
+
1033
+ elif is_sd3 :
1037
1034
models_for_export = get_sd3_models_for_export (pipeline , exporter , int_dtype , float_dtype )
1038
- else :
1035
+ elif is_flux :
1039
1036
models_for_export = get_flux_models_for_export (pipeline , exporter , int_dtype , float_dtype )
1040
-
1037
+ else :
1038
+ raise ValueError (f"Unsupported pipeline type `{ pipeline .__class__ .__name__ } ` provided" )
1041
1039
return None , models_for_export
1042
1040
1043
1041
@@ -1135,6 +1133,7 @@ def get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype):
1135
1133
int_dtype = int_dtype ,
1136
1134
float_dtype = float_dtype ,
1137
1135
)
1136
+ export_config .runtime_options = {"ACTIVATIONS_SCALE_FACTOR" : "8.0" }
1138
1137
models_for_export ["text_encoder_3" ] = (text_encoder_3 , export_config )
1139
1138
1140
1139
return models_for_export
@@ -1172,6 +1171,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
1172
1171
transformer_export_config = export_config_constructor (
1173
1172
pipeline .transformer .config , int_dtype = int_dtype , float_dtype = float_dtype
1174
1173
)
1174
+ transformer_export_config .runtime_options = {"ACTIVATIONS_SCALE_FACTOR" : "8.0" }
1175
1175
models_for_export ["transformer" ] = (transformer , transformer_export_config )
1176
1176
1177
1177
# VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565
@@ -1187,6 +1187,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
1187
1187
vae_encoder_export_config = vae_config_constructor (
1188
1188
vae_encoder .config , int_dtype = int_dtype , float_dtype = float_dtype
1189
1189
)
1190
+ vae_encoder_export_config .runtime_options = {"ACTIVATIONS_SCALE_FACTOR" : "8.0" }
1190
1191
models_for_export ["vae_encoder" ] = (vae_encoder , vae_encoder_export_config )
1191
1192
1192
1193
# VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600
@@ -1202,6 +1203,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
1202
1203
vae_decoder_export_config = vae_config_constructor (
1203
1204
vae_decoder .config , int_dtype = int_dtype , float_dtype = float_dtype
1204
1205
)
1206
+ vae_decoder_export_config .runtime_options = {"ACTIVATIONS_SCALE_FACTOR" : "8.0" }
1205
1207
models_for_export ["vae_decoder" ] = (vae_decoder , vae_decoder_export_config )
1206
1208
1207
1209
text_encoder_2 = getattr (pipeline , "text_encoder_2" , None )
@@ -1218,6 +1220,46 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
1218
1220
int_dtype = int_dtype ,
1219
1221
float_dtype = float_dtype ,
1220
1222
)
1223
+ export_config .runtime_options = {"ACTIVATIONS_SCALE_FACTOR" : "8.0" }
1221
1224
models_for_export ["text_encoder_2" ] = (text_encoder_2 , export_config )
1222
1225
1223
1226
return models_for_export
1227
+
1228
+
1229
+ def _get_encoder_decoder_stateful_models_for_export (
1230
+ model : Union ["PreTrainedModel" , "TFPreTrainedModel" ],
1231
+ task : str ,
1232
+ _variant : str ,
1233
+ library_name : str ,
1234
+ int_dtype : str = "int64" ,
1235
+ float_dtype : str = "fp32" ,
1236
+ preprocessors : Optional [List [Any ]] = None ,
1237
+ ):
1238
+ export_config_constructor = TasksManager .get_exporter_config_constructor (
1239
+ model = model , exporter = "openvino" , task = task , library_name = library_name
1240
+ )
1241
+ export_config = export_config_constructor (
1242
+ model .config ,
1243
+ int_dtype = int_dtype ,
1244
+ float_dtype = float_dtype ,
1245
+ preprocessors = preprocessors ,
1246
+ legacy = False ,
1247
+ )
1248
+
1249
+ export_config .variant = _variant
1250
+ all_variants = "\n " .join ([f" - { name } : { description } " for name , description in export_config .VARIANTS .items ()])
1251
+ logger .info (f"Using the export variant { export_config .variant } . Available variants are:\n { all_variants } " )
1252
+
1253
+ models_for_export = _get_submodels_for_export_encoder_decoder (model , use_past = False )
1254
+
1255
+ encoder_export_config = export_config .with_behavior ("encoder" )
1256
+ models_for_export [ENCODER_NAME ] = (models_for_export [ENCODER_NAME ], encoder_export_config )
1257
+
1258
+ decoder_export_config_with_past = export_config .with_behavior ("decoder" , use_past = True , use_past_in_inputs = True )
1259
+
1260
+ decoder_export_config_with_past .stateful = True
1261
+ models_for_export [DECODER_NAME ] = (
1262
+ models_for_export [DECODER_NAME ],
1263
+ decoder_export_config_with_past ,
1264
+ )
1265
+ return None , models_for_export
0 commit comments