Skip to content

Commit 17debf6

Browse files
committed
set kv cache precision for seq2seq models
1 parent dbf77e4 commit 17debf6

File tree

2 files changed

+2
-6
lines changed

2 files changed

+2
-6
lines changed

optimum/intel/openvino/modeling_base.py

+2
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,8 @@ def _set_ov_config_parameters(self):
253253
q_config = self._openvino_config.quantization_config if self._openvino_config else None
254254
if isinstance(q_config, OVDynamicQuantizationConfig):
255255
self.ov_config["DYNAMIC_QUANTIZATION_GROUP_SIZE"] = str(q_config.activations_group_size)
256+
if self.can_generate() and "KV_CACHE_PRECISION" not in self.ov_config:
257+
self.ov_config["KV_CACHE_PRECISION"] = "u8"
256258

257259
@staticmethod
258260
def _cached_file(

optimum/intel/openvino/modeling_decoder.py

-6
Original file line numberDiff line numberDiff line change
@@ -328,12 +328,6 @@ def _make_stateful(self):
328328
patch_stateful(self.config, self.model)
329329
self.stateful = True
330330

331-
def _set_ov_config_parameters(self):
332-
super()._set_ov_config_parameters()
333-
334-
if "DYNAMIC_QUANTIZATION_GROUP_SIZE" in self.ov_config and "KV_CACHE_PRECISION" not in self.ov_config:
335-
self.ov_config["KV_CACHE_PRECISION"] = "u8"
336-
337331

338332
@add_start_docstrings(
339333
"""

0 commit comments

Comments
 (0)