Skip to content

Commit 8ef3997

Browse files
authored
Disable kv cache compression for fp vlm (#1080)
1 parent cda4908 commit 8ef3997

File tree

2 files changed

+16
-2
lines changed

2 files changed

+16
-2
lines changed

optimum/exporters/openvino/convert.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,15 @@ def _set_runtime_options(
100100
for model_name in models_and_export_configs.keys():
101101
_, sub_export_config = models_and_export_configs[model_name]
102102
sub_export_config.runtime_options = {}
103-
if "diffusers" in library_name or "text-generation" in task:
103+
if (
104+
"diffusers" in library_name
105+
or "text-generation" in task
106+
or ("image-text-to-text" in task and model_name == "language_model")
107+
):
104108
sub_export_config.runtime_options["ACTIVATIONS_SCALE_FACTOR"] = "8.0"
105-
if not quantized_model and "text-generation" in task:
109+
if not quantized_model and (
110+
"text-generation" in task or ("image-text-to-text" in task and model_name == "language_model")
111+
):
106112
sub_export_config.runtime_options["KV_CACHE_PRECISION"] = "f16"
107113

108114

tests/openvino/test_export.py

+8
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,14 @@ def _openvino_export(
134134
self.assertTrue(ov_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]))
135135
self.assertTrue(ov_model.model.has_rt_info(["runtime_options", "KV_CACHE_PRECISION"]))
136136

137+
if task == "image-text-to-text":
138+
self.assertTrue(
139+
ov_model.language_model.model.has_rt_info(["runtime_options", "KV_CACHE_PRECISION"])
140+
)
141+
self.assertTrue(
142+
ov_model.language_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
143+
)
144+
137145
if library_name == "diffusers":
138146
self.assertTrue(
139147
ov_model.vae_encoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])

0 commit comments

Comments
 (0)