|
28 | 28 | from openvino.tools.ovc import convert_model
|
29 | 29 | from optimum.exporters import TasksManager
|
30 | 30 | from optimum.exporters.utils import (
|
31 |
| - _get_submodels_and_export_configs as _default_get_submodels_and_export_configs, |
| 31 | + DECODER_NAME, |
| 32 | + ENCODER_NAME, |
| 33 | + _get_submodels_for_export_encoder_decoder, |
| 34 | + get_diffusion_models_for_export, |
32 | 35 | )
|
33 | 36 | from optimum.exporters.utils import (
|
34 |
| - get_diffusion_models_for_export, |
| 37 | + _get_submodels_and_export_configs as _default_get_submodels_and_export_configs, |
35 | 38 | )
|
36 | 39 | from optimum.intel.utils.import_utils import (
|
37 | 40 | _diffusers_version,
|
@@ -103,10 +106,16 @@ def _set_runtime_options(
|
103 | 106 | _, sub_export_config = models_and_export_configs[model_name]
|
104 | 107 | if not hasattr(sub_export_config, "runtime_options"):
|
105 | 108 | sub_export_config.runtime_options = {}
|
106 |
| - if "text-generation" in task or ("image-text-to-text" in task and model_name == "language_model"): |
| 109 | + if ( |
| 110 | + "text-generation" in task |
| 111 | + or ("image-text-to-text" in task and model_name == "language_model") |
| 112 | + or getattr(sub_export_config, "stateful", False) |
| 113 | + ): |
107 | 114 | sub_export_config.runtime_options["ACTIVATIONS_SCALE_FACTOR"] = "8.0"
|
108 | 115 | if not quantized_model and (
|
109 |
| - "text-generation" in task or ("image-text-to-text" in task and model_name == "language_model") |
| 116 | + "text-generation" in task |
| 117 | + or ("image-text-to-text" in task and model_name == "language_model") |
| 118 | + or getattr(sub_export_config, "stateful", False) |
110 | 119 | ):
|
111 | 120 | sub_export_config.runtime_options["KV_CACHE_PRECISION"] = "f16"
|
112 | 121 |
|
@@ -639,10 +648,14 @@ def export_from_model(
|
639 | 648 |
|
640 | 649 | logger.info(f"Automatic task detection to: {task}.")
|
641 | 650 |
|
| 651 | + is_encoder_decoder = getattr(getattr(model, "config", {}), "is_encoder_decoder", False) |
| 652 | + model_type = getattr(getattr(model, "config", {}), "model_type", "") |
642 | 653 | stateful = stateful and (
|
643 |
| - ensure_export_task_support_stateful(task) |
644 |
| - or ensure_model_type_support_stateful(getattr(getattr(model, "config", {}), "model_type", "")) |
| 654 | + ensure_export_task_support_stateful(task) or ensure_model_type_support_stateful(model_type) |
645 | 655 | )
|
| 656 | + |
| 657 | + if stateful and is_encoder_decoder and not getattr(model, "_supports_cache_class", False): |
| 658 | + stateful = False |
646 | 659 | # TODO: support onnx_config.py in the model repo
|
647 | 660 | if custom_architecture and custom_export_configs is None:
|
648 | 661 | raise ValueError(
|
@@ -684,6 +697,11 @@ def export_from_model(
|
684 | 697 | if library_name == "diffusers":
|
685 | 698 | export_config, models_and_export_configs = get_diffusion_models_for_export_ext(model, exporter="openvino")
|
686 | 699 | stateful_submodels = False
|
| 700 | + elif stateful and is_encoder_decoder and not custom_architecture: |
| 701 | + export_config, models_and_export_configs = _get_encoder_decoder_stateful_models_for_export( |
| 702 | + model=model, task=task, preprocessors=preprocessors, library_name=library_name, _variant="default" |
| 703 | + ) |
| 704 | + stateful_submodels = [False, True] |
687 | 705 | else:
|
688 | 706 | logging.disable(logging.INFO)
|
689 | 707 | export_config, models_and_export_configs, stateful_submodels = _get_submodels_and_export_configs(
|
@@ -1204,3 +1222,42 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
|
1204 | 1222 | models_for_export["text_encoder_2"] = (text_encoder_2, export_config)
|
1205 | 1223 |
|
1206 | 1224 | return models_for_export
|
| 1225 | + |
| 1226 | + |
| 1227 | +def _get_encoder_decoder_stateful_models_for_export( |
| 1228 | + model: Union["PreTrainedModel", "TFPreTrainedModel"], |
| 1229 | + task: str, |
| 1230 | + _variant: str, |
| 1231 | + library_name: str, |
| 1232 | + int_dtype: str = "int64", |
| 1233 | + float_dtype: str = "fp32", |
| 1234 | + preprocessors: Optional[List[Any]] = None, |
| 1235 | +): |
| 1236 | + export_config_constructor = TasksManager.get_exporter_config_constructor( |
| 1237 | + model=model, exporter="openvino", task=task, library_name=library_name |
| 1238 | + ) |
| 1239 | + export_config = export_config_constructor( |
| 1240 | + model.config, |
| 1241 | + int_dtype=int_dtype, |
| 1242 | + float_dtype=float_dtype, |
| 1243 | + preprocessors=preprocessors, |
| 1244 | + legacy=False, |
| 1245 | + ) |
| 1246 | + |
| 1247 | + export_config.variant = _variant |
| 1248 | + all_variants = "\n".join([f" - {name}: {description}" for name, description in export_config.VARIANTS.items()]) |
| 1249 | + logger.info(f"Using the export variant {export_config.variant}. Available variants are:\n{all_variants}") |
| 1250 | + |
| 1251 | + models_for_export = _get_submodels_for_export_encoder_decoder(model, use_past=False) |
| 1252 | + |
| 1253 | + encoder_export_config = export_config.with_behavior("encoder") |
| 1254 | + models_for_export[ENCODER_NAME] = (models_for_export[ENCODER_NAME], encoder_export_config) |
| 1255 | + |
| 1256 | + decoder_export_config_with_past = export_config.with_behavior("decoder", use_past=True, use_past_in_inputs=True) |
| 1257 | + |
| 1258 | + decoder_export_config_with_past.stateful = True |
| 1259 | + models_for_export[DECODER_NAME] = ( |
| 1260 | + models_for_export[DECODER_NAME], |
| 1261 | + decoder_export_config_with_past, |
| 1262 | + ) |
| 1263 | + return None, models_for_export |
0 commit comments