huggingface · echarlaix · Feb 21, 2024 · Feb 19, 2024 · Feb 19, 2024 · Feb 20, 2024
diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
@@ -419,7 +419,8 @@ def prepare_inputs(
             # past_key_values are not used explicitly, instead they are handled inside the model
             if past_key_values is None:
                 # This is the first iteration in a sequence, reset all states
-                self.request.reset_state()
+                if self.request is not None:
+                    self.request.reset_state()
                 # Set initial value for the next beam_idx input that will be used at the current iteration
                 # and will be optionally updated by _reorder_cache at the next iterations if beam_search is used
                 self.next_beam_idx = np.arange(batch_size, dtype=int)
@@ -592,7 +593,10 @@ def _from_pretrained(
         else:
             init_cls = cls
 
-        causal_model = init_cls(model=model, config=config, model_save_dir=model_cache_path.parent, **kwargs)
+        enable_compilation = kwargs.pop("compile", True) and not load_in_4bit
+        causal_model = init_cls(
+            model=model, config=config, model_save_dir=model_cache_path.parent, compile=enable_compilation, **kwargs
+        )
 
         if load_in_4bit:
             if not is_nncf_available():