Skip to content

Commit 4a1cf6c

Browse files
authored
Merge branch 'main' into ci
2 parents af1536f + 4f79e05 commit 4a1cf6c

File tree

10 files changed

+98
-743
lines changed

10 files changed

+98
-743
lines changed

.github/workflows/test_generation.yml

-36
This file was deleted.

examples/neural_compressor/text-generation/README.md

-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ limitations under the License.
1818

1919
Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-generation/run_generation.py).
2020

21-
The original generation task only supported the PyTorch eager model. By calling the `TSModelForCausalLM` class, we can now support a TorchScript model for generation tasks.
22-
2321
This example also allows us to apply different quantization approaches (such as dynamic, static, The example applies post-training static quantization on a gptj model).
2422

2523
Example usage:

optimum/exporters/openvino/model_patcher.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -3935,14 +3935,28 @@ def __enter__(self):
39353935
# Modified from https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L390
39363936
# added attention_mask input instead of internal calculation (unsupported by tracing due to cycle with dynamic len)
39373937
def sdpa_attn_forward(
3938-
self, hidden_states: torch.Tensor, attention_mask: torch.Tensor, rotary_pos_emb: torch.Tensor = None
3938+
self,
3939+
hidden_states: torch.Tensor,
3940+
attention_mask: torch.Tensor,
3941+
rotary_pos_emb: torch.Tensor = None,
3942+
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
39393943
) -> torch.Tensor:
39403944
from transformers.models.qwen2_vl.modeling_qwen2_vl import apply_rotary_pos_emb_vision
39413945

39423946
seq_length = hidden_states.shape[0]
39433947
q, k, v = self.qkv(hidden_states).reshape(seq_length, 3, self.num_heads, -1).permute(1, 0, 2, 3).unbind(0)
3944-
q = apply_rotary_pos_emb_vision(q.unsqueeze(0), rotary_pos_emb).squeeze(0)
3945-
k = apply_rotary_pos_emb_vision(k.unsqueeze(0), rotary_pos_emb).squeeze(0)
3948+
3949+
if is_transformers_version(">=", "4.49"):
3950+
if position_embeddings is None:
3951+
emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
3952+
cos = emb.cos().float()
3953+
sin = emb.sin().float()
3954+
else:
3955+
cos, sin = position_embeddings
3956+
q, k = apply_rotary_pos_emb_vision(q, k, cos, sin)
3957+
else:
3958+
q = apply_rotary_pos_emb_vision(q.unsqueeze(0), rotary_pos_emb).squeeze(0)
3959+
k = apply_rotary_pos_emb_vision(k.unsqueeze(0), rotary_pos_emb).squeeze(0)
39463960

39473961
q = q.transpose(0, 1)
39483962
k = k.transpose(0, 1)

optimum/intel/generation/__init__.py

-16
This file was deleted.

0 commit comments

Comments
 (0)