|
29 | 29 | from optimum.exporters import TasksManager
|
30 | 30 | from optimum.exporters.utils import (
|
31 | 31 | DECODER_NAME,
|
32 |
| - DECODER_WITH_PAST_NAME, |
33 | 32 | ENCODER_NAME,
|
34 | 33 | _get_submodels_for_export_encoder_decoder,
|
35 | 34 | get_diffusion_models_for_export,
|
|
48 | 47 | _transformers_version,
|
49 | 48 | compare_versions,
|
50 | 49 | is_diffusers_version,
|
51 |
| - is_openvino_version, |
52 | 50 | is_openvino_tokenizers_version,
|
53 | 51 | is_tokenizers_version,
|
54 | 52 | is_transformers_version,
|
@@ -110,10 +108,13 @@ def _set_runtime_options(
|
110 | 108 | "diffusers" in library_name
|
111 | 109 | or "text-generation" in task
|
112 | 110 | or ("image-text-to-text" in task and model_name == "language_model")
|
| 111 | + or getattr(sub_export_config, "stateful", False) |
113 | 112 | ):
|
114 | 113 | sub_export_config.runtime_options["ACTIVATIONS_SCALE_FACTOR"] = "8.0"
|
115 | 114 | if not quantized_model and (
|
116 |
| - "text-generation" in task or ("image-text-to-text" in task and model_name == "language_model") |
| 115 | + "text-generation" in task |
| 116 | + or ("image-text-to-text" in task and model_name == "language_model") |
| 117 | + or getattr(sub_export_config, "stateful", False) |
117 | 118 | ):
|
118 | 119 | sub_export_config.runtime_options["KV_CACHE_PRECISION"] = "f16"
|
119 | 120 |
|
@@ -643,7 +644,7 @@ def export_from_model(
|
643 | 644 | is_encoder_decoder = getattr(getattr(model, "config", {}), "is_encoder_decoder", False)
|
644 | 645 | model_type = getattr(getattr(model, "config", {}), "model_type", "")
|
645 | 646 | stateful = stateful and (
|
646 |
| - ensure_export_task_support_stateful(task, is_encoder_decoder) or ensure_model_type_support_stateful(model_type) |
| 647 | + ensure_export_task_support_stateful(task) or ensure_model_type_support_stateful(model_type) |
647 | 648 | )
|
648 | 649 |
|
649 | 650 | if stateful and is_encoder_decoder and not getattr(model, "_supports_cache_class", False):
|
@@ -1251,17 +1252,16 @@ def _get_encoder_decoder_stateful_models_for_export(
|
1251 | 1252 | all_variants = "\n".join([f" - {name}: {description}" for name, description in export_config.VARIANTS.items()])
|
1252 | 1253 | logger.info(f"Using the export variant {export_config.variant}. Available variants are:\n{all_variants}")
|
1253 | 1254 |
|
1254 |
| - models_for_export = _get_submodels_for_export_encoder_decoder(model, use_past=True) |
| 1255 | + models_for_export = _get_submodels_for_export_encoder_decoder(model, use_past=False) |
1255 | 1256 |
|
1256 | 1257 | encoder_export_config = export_config.with_behavior("encoder")
|
1257 | 1258 | models_for_export[ENCODER_NAME] = (models_for_export[ENCODER_NAME], encoder_export_config)
|
1258 | 1259 |
|
1259 | 1260 | decoder_export_config_with_past = export_config.with_behavior("decoder", use_past=True, use_past_in_inputs=True)
|
1260 | 1261 |
|
1261 | 1262 | decoder_export_config_with_past.stateful = True
|
1262 |
| - decoder_with_past_model = models_for_export.pop(DECODER_WITH_PAST_NAME) |
1263 | 1263 | models_for_export[DECODER_NAME] = (
|
1264 |
| - decoder_with_past_model, |
| 1264 | + models_for_export[DECODER_NAME], |
1265 | 1265 | decoder_export_config_with_past,
|
1266 | 1266 | )
|
1267 | 1267 | return None, models_for_export
|
0 commit comments