Skip to content

Commit 1e519c7

Browse files
committed
Revert explicit definition of U8 KV-cache
1 parent 35cf1d2 commit 1e519c7

File tree

1 file changed

+0
-3
lines changed

1 file changed

+0
-3
lines changed

optimum/exporters/openvino/__main__.py

-3
Original file line numberDiff line numberDiff line change
@@ -474,9 +474,6 @@ class StoreAttr(object):
474474
from optimum.intel.openvino.quantization import _weight_only_quantization
475475

476476
_weight_only_quantization(submodel, quantization_config)
477-
if "text-generation" in task:
478-
submodel.set_rt_info("u8", ["runtime_options", "KV_CACHE_PRECISION"])
479-
480477
compressed_submodel_path = submodel_path.parent / f"{submodel_path.stem}_compressed.xml"
481478
save_model(submodel, compressed_submodel_path, compress_to_fp16=False)
482479
del submodel

0 commit comments

Comments
 (0)