Skip to content

Commit 01c8117

Browse files
committed
hot fix for weights compression
1 parent 72b0630 commit 01c8117

File tree

2 files changed

+25
-4
lines changed

2 files changed

+25
-4
lines changed

optimum/intel/openvino/modeling_decoder.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -261,10 +261,10 @@ def _from_transformers(
261261
task = task + "-with-past"
262262

263263
# If load_in_8bit or quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
264-
if load_in_8bit is None or not quantization_config:
265-
ov_config = None
264+
if load_in_8bit is None and not quantization_config:
265+
ov_export_config = None
266266
else:
267-
ov_config = OVConfig(dtype="fp32")
267+
ov_export_config = OVConfig(dtype="fp32")
268268

269269
stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)
270270

@@ -279,7 +279,7 @@ def _from_transformers(
279279
local_files_only=local_files_only,
280280
force_download=force_download,
281281
trust_remote_code=trust_remote_code,
282-
ov_config=ov_config,
282+
ov_config=ov_export_config,
283283
stateful=stateful,
284284
)
285285

tests/openvino/test_quantization.py

+21
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,27 @@ def test_ovmodel_load_large_model_with_uncompressed_weights(self):
490490
}
491491
save_model_patch.aasert_called_with(saving_params)
492492

493+
def test_ovmodel_load_large_model_with_additional_quantization_config(self):
494+
with unittest.mock.patch("transformers.modeling_utils.ModuleUtilsMixin") as model_mixin_patch:
495+
model_mixin_patch.num_parameters.return_value = 2e9
496+
with unittest.mock.patch("openvino.runtime.ie_api.Core.read_model") as core_patch:
497+
with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as save_model_patch:
498+
_ = OVModelForCausalLM.from_pretrained(
499+
MODEL_NAMES["llama"],
500+
export=True,
501+
compile=False,
502+
use_cache=False,
503+
quantization_config=OVWeightQuantizationConfig(bits=4, sym=True, group_size=-1, ratio=0.8),
504+
)
505+
# quantization will be performed later, using load_model
506+
saving_params = {
507+
"model": unittest.mock.ANY,
508+
"path": unittest.mock.ANY,
509+
"compression_option": "fp32",
510+
"compression_ratio": None,
511+
}
512+
save_model_patch.aasert_called_with(saving_params)
513+
493514

494515
class OVQuantizerQATest(unittest.TestCase):
495516
SUPPORTED_ARCHITECTURES = (("hf-internal-testing/tiny-random-BertForQuestionAnswering",),)

0 commit comments

Comments
 (0)