Skip to content

Commit 18ba0bd

Browse files
[OV]: Fixed inference after 4 bit weight compression (#569)
* [OV]: Fixed inferece after 4 bit weight compression * Fixed issue * Update optimum/intel/openvino/modeling_decoder.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> * Applied comments * Fixed issue when request is None --------- Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
1 parent 8f7d016 commit 18ba0bd

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

optimum/intel/openvino/modeling_decoder.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,8 @@ def prepare_inputs(
419419
# past_key_values are not used explicitly, instead they are handled inside the model
420420
if past_key_values is None:
421421
# This is the first iteration in a sequence, reset all states
422-
self.request.reset_state()
422+
if self.request is not None:
423+
self.request.reset_state()
423424
# Set initial value for the next beam_idx input that will be used at the current iteration
424425
# and will be optionally updated by _reorder_cache at the next iterations if beam_search is used
425426
self.next_beam_idx = np.arange(batch_size, dtype=int)
@@ -592,7 +593,10 @@ def _from_pretrained(
592593
else:
593594
init_cls = cls
594595

595-
causal_model = init_cls(model=model, config=config, model_save_dir=model_cache_path.parent, **kwargs)
596+
enable_compilation = kwargs.pop("compile", True) and not load_in_4bit
597+
causal_model = init_cls(
598+
model=model, config=config, model_save_dir=model_cache_path.parent, compile=enable_compilation, **kwargs
599+
)
596600

597601
if load_in_4bit:
598602
if not is_nncf_available():

0 commit comments

Comments
 (0)