fix compatibility with latest transformers release

echarlaix · echarlaix · commit 920341aa3697 · 2024-04-24T16:34:56.000+02:00
diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py
@@ -291,11 +291,9 @@ def __exit__(self, exc_type, exc_value, traceback):
 # adopted from
 # https://github.com/huggingface/transformers/blob/v4.39.3/src/transformers/models/gemma/modeling_gemma.py#L965
 # https://github.com/huggingface/transformers/blob/v4.39.3/src/transformers/models/llama/modeling_llama.py#L1058
-def _llama_gemma_update_causal_mask(self, attention_mask, input_tensor, cache_position, **kwargs):
+def _llama_gemma_update_causal_mask(self, attention_mask, input_tensor, cache_position, past_seen_tokens=None):
     from transformers.modeling_attn_mask_utils import AttentionMaskConverter
 
-    # for compatibility with https://github.com/huggingface/transformers/pull/30047
-    current_length = kwargs.get("current_length", cache_position[-1])
     dtype, device = input_tensor.dtype, input_tensor.device
 
     # using minimum from dtype with larger bandwith (floa32) may lead to overflow
@@ -305,7 +303,14 @@ def _llama_gemma_update_causal_mask(self, attention_mask, input_tensor, cache_po
     if hasattr(getattr(self.layers[0], "self_attn", {}), "past_key_value"):  # static cache
         target_length = self.config.max_position_embeddings
     else:  # dynamic cache
-        target_length = attention_mask.shape[-1] if isinstance(attention_mask, torch.Tensor) else current_length + 1
+        if past_seen_tokens is not None:
+            current_length = past_seen_tokens + sequence_length + 1
+        # TODO : remove after support of transformers >= v4.40.0
+        else:
+            current_length = cache_position[-1] + 1
+
+        target_length = attention_mask.shape[-1] if isinstance(attention_mask, torch.Tensor) else current_length
+
 
     causal_mask = torch.full((sequence_length, target_length), fill_value=1, dtype=dtype, device=device) * min_dtype
     if sequence_length != 1: