@@ -99,11 +99,15 @@ def _set_runtime_options(
99
99
],
100
100
task : str ,
101
101
library_name : str ,
102
+ quantized_model : bool ,
102
103
):
103
104
for model_name in models_and_export_configs .keys ():
104
105
_ , sub_export_config = models_and_export_configs [model_name ]
106
+ sub_export_config .runtime_options = {}
105
107
if "diffusers" in library_name or "text-generation" in task :
106
- sub_export_config .runtime_options = {"ACTIVATIONS_SCALE_FACTOR" : "8.0" }
108
+ sub_export_config .runtime_options ["ACTIVATIONS_SCALE_FACTOR" ] = "8.0"
109
+ if not quantized_model and "text-generation" in task :
110
+ sub_export_config .runtime_options ["KV_CACHE_PRECISION" ] = "f16"
107
111
108
112
109
113
def _save_model (
@@ -116,8 +120,8 @@ def _save_model(
116
120
compress_to_fp16 = ov_config is not None and ov_config .dtype == "fp16"
117
121
model = _add_version_info_to_model (model , library_name )
118
122
119
- if hasattr (config , "runtime_options" ):
120
- model = _add_runtime_options_to_rt_info (model , config . runtime_options )
123
+ runtime_options = config . runtime_options if hasattr (config , "runtime_options" ) else {}
124
+ model = _add_runtime_options_to_rt_info (model , runtime_options )
121
125
save_model (model , path , compress_to_fp16 )
122
126
del model
123
127
gc .collect ()
@@ -755,7 +759,12 @@ def export_from_model(
755
759
756
760
model .save_config (output )
757
761
758
- _set_runtime_options (models_and_export_configs , task , library_name )
762
+ _set_runtime_options (
763
+ models_and_export_configs ,
764
+ task ,
765
+ library_name ,
766
+ hasattr (ov_config , "quantization_config" ) and ov_config .quantization_config ,
767
+ )
759
768
760
769
export_models (
761
770
models_and_export_configs = models_and_export_configs ,
0 commit comments