|
28 | 28 | from openvino.tools.ovc import convert_model
|
29 | 29 | from optimum.exporters import TasksManager
|
30 | 30 | from optimum.exporters.utils import (
|
31 |
| - _get_submodels_and_export_configs as _default_get_submodels_and_export_configs, |
| 31 | + DECODER_NAME, |
| 32 | + ENCODER_NAME, |
| 33 | + _get_submodels_for_export_encoder_decoder, |
| 34 | + get_diffusion_models_for_export, |
32 | 35 | )
|
33 | 36 | from optimum.exporters.utils import (
|
34 |
| - get_diffusion_models_for_export, |
| 37 | + _get_submodels_and_export_configs as _default_get_submodels_and_export_configs, |
35 | 38 | )
|
36 | 39 | from optimum.intel.utils.import_utils import (
|
37 | 40 | _diffusers_version,
|
@@ -106,10 +109,13 @@ def _set_runtime_options(
|
106 | 109 | "diffusers" in library_name
|
107 | 110 | or "text-generation" in task
|
108 | 111 | or ("image-text-to-text" in task and model_name == "language_model")
|
| 112 | + or getattr(sub_export_config, "stateful", False) |
109 | 113 | ):
|
110 | 114 | sub_export_config.runtime_options["ACTIVATIONS_SCALE_FACTOR"] = "8.0"
|
111 | 115 | if not quantized_model and (
|
112 |
| - "text-generation" in task or ("image-text-to-text" in task and model_name == "language_model") |
| 116 | + "text-generation" in task |
| 117 | + or ("image-text-to-text" in task and model_name == "language_model") |
| 118 | + or getattr(sub_export_config, "stateful", False) |
113 | 119 | ):
|
114 | 120 | sub_export_config.runtime_options["KV_CACHE_PRECISION"] = "f16"
|
115 | 121 |
|
@@ -642,10 +648,14 @@ def export_from_model(
|
642 | 648 |
|
643 | 649 | logger.info(f"Automatic task detection to: {task}.")
|
644 | 650 |
|
| 651 | + is_encoder_decoder = getattr(getattr(model, "config", {}), "is_encoder_decoder", False) |
| 652 | + model_type = getattr(getattr(model, "config", {}), "model_type", "") |
645 | 653 | stateful = stateful and (
|
646 |
| - ensure_export_task_support_stateful(task) |
647 |
| - or ensure_model_type_support_stateful(getattr(getattr(model, "config", {}), "model_type", "")) |
| 654 | + ensure_export_task_support_stateful(task) or ensure_model_type_support_stateful(model_type) |
648 | 655 | )
|
| 656 | + |
| 657 | + if stateful and is_encoder_decoder and not getattr(model, "_supports_cache_class", False): |
| 658 | + stateful = False |
649 | 659 | # TODO: support onnx_config.py in the model repo
|
650 | 660 | if custom_architecture and custom_export_configs is None:
|
651 | 661 | raise ValueError(
|
@@ -687,6 +697,11 @@ def export_from_model(
|
687 | 697 | if library_name == "diffusers":
|
688 | 698 | export_config, models_and_export_configs = get_diffusion_models_for_export_ext(model, exporter="openvino")
|
689 | 699 | stateful_submodels = False
|
| 700 | + elif stateful and is_encoder_decoder and not custom_architecture: |
| 701 | + export_config, models_and_export_configs = _get_encoder_decoder_stateful_models_for_export( |
| 702 | + model=model, task=task, preprocessors=preprocessors, library_name=library_name, _variant="default" |
| 703 | + ) |
| 704 | + stateful_submodels = [False, True] |
690 | 705 | else:
|
691 | 706 | logging.disable(logging.INFO)
|
692 | 707 | export_config, models_and_export_configs, stateful_submodels = _get_submodels_and_export_configs(
|
@@ -1221,3 +1236,42 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
|
1221 | 1236 | models_for_export["text_encoder_2"] = (text_encoder_2, export_config)
|
1222 | 1237 |
|
1223 | 1238 | return models_for_export
|
| 1239 | + |
| 1240 | + |
| 1241 | +def _get_encoder_decoder_stateful_models_for_export( |
| 1242 | + model: Union["PreTrainedModel", "TFPreTrainedModel"], |
| 1243 | + task: str, |
| 1244 | + _variant: str, |
| 1245 | + library_name: str, |
| 1246 | + int_dtype: str = "int64", |
| 1247 | + float_dtype: str = "fp32", |
| 1248 | + preprocessors: Optional[List[Any]] = None, |
| 1249 | +): |
| 1250 | + export_config_constructor = TasksManager.get_exporter_config_constructor( |
| 1251 | + model=model, exporter="openvino", task=task, library_name=library_name |
| 1252 | + ) |
| 1253 | + export_config = export_config_constructor( |
| 1254 | + model.config, |
| 1255 | + int_dtype=int_dtype, |
| 1256 | + float_dtype=float_dtype, |
| 1257 | + preprocessors=preprocessors, |
| 1258 | + legacy=False, |
| 1259 | + ) |
| 1260 | + |
| 1261 | + export_config.variant = _variant |
| 1262 | + all_variants = "\n".join([f" - {name}: {description}" for name, description in export_config.VARIANTS.items()]) |
| 1263 | + logger.info(f"Using the export variant {export_config.variant}. Available variants are:\n{all_variants}") |
| 1264 | + |
| 1265 | + models_for_export = _get_submodels_for_export_encoder_decoder(model, use_past=False) |
| 1266 | + |
| 1267 | + encoder_export_config = export_config.with_behavior("encoder") |
| 1268 | + models_for_export[ENCODER_NAME] = (models_for_export[ENCODER_NAME], encoder_export_config) |
| 1269 | + |
| 1270 | + decoder_export_config_with_past = export_config.with_behavior("decoder", use_past=True, use_past_in_inputs=True) |
| 1271 | + |
| 1272 | + decoder_export_config_with_past.stateful = True |
| 1273 | + models_for_export[DECODER_NAME] = ( |
| 1274 | + models_for_export[DECODER_NAME], |
| 1275 | + decoder_export_config_with_past, |
| 1276 | + ) |
| 1277 | + return None, models_for_export |
0 commit comments