huggingface
diff --git a/‎.github/workflows/test_generation.yml
-36 b/‎.github/workflows/test_generation.yml
-36
diff --git a/‎examples/neural_compressor/text-generation/README.md
-2 b/‎examples/neural_compressor/text-generation/README.md
-2
diff --git a/‎optimum/exporters/openvino/model_patcher.py
+17-3 b/‎optimum/exporters/openvino/model_patcher.py
+17-3
diff --git a/‎optimum/intel/generation/__init__.py
-16 b/‎optimum/intel/generation/__init__.py
-16
@@ -18,8 +18,6 @@ limitations under the License.
 
 Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-generation/run_generation.py).
 
-The original generation task only supported the PyTorch eager model. By calling the `TSModelForCausalLM` class, we can now support a TorchScript model for generation tasks.
-
 This example also allows us to apply different quantization approaches (such as dynamic, static, The example applies post-training static quantization on a gptj model).
 
 Example usage:
 
@@ -3935,14 +3935,28 @@ def __enter__(self):
         # Modified from https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L390
         # added attention_mask input instead of internal calculation (unsupported by tracing due to cycle with dynamic len)
         def sdpa_attn_forward(
-            self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, rotary_pos_emb: torch.Tensor = None
+            self,
+            hidden_states: torch.Tensor,
+            attention_mask: torch.Tensor,
+            rotary_pos_emb: torch.Tensor = None,
+            position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
         ) -> torch.Tensor:
             from transformers.models.qwen2_vl.modeling_qwen2_vl import apply_rotary_pos_emb_vision
 
             seq_length = hidden_states.shape[0]
             q, k, v = self.qkv(hidden_states).reshape(seq_length, 3, self.num_heads, -1).permute(1, 0, 2, 3).unbind(0)
-            q = apply_rotary_pos_emb_vision(q.unsqueeze(0), rotary_pos_emb).squeeze(0)
-            k = apply_rotary_pos_emb_vision(k.unsqueeze(0), rotary_pos_emb).squeeze(0)
+
+            if is_transformers_version(">=", "4.49"):
+                if position_embeddings is None:
+                    emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
+                    cos = emb.cos().float()
+                    sin = emb.sin().float()
+                else:
+                    cos, sin = position_embeddings
+                q, k = apply_rotary_pos_emb_vision(q, k, cos, sin)
+            else:
+                q = apply_rotary_pos_emb_vision(q.unsqueeze(0), rotary_pos_emb).squeeze(0)
+                k = apply_rotary_pos_emb_vision(k.unsqueeze(0), rotary_pos_emb).squeeze(0)
 
             q = q.transpose(0, 1)
             k = k.transpose(0, 1)