Skip to content

Commit b04b435

Browse files
committed
add comments for ipex_rope and ipex_scale_dot_product
1 parent 4c1c636 commit b04b435

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

optimum/exporters/ipex/llama_functions.py

+3
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def llama_attn_forward(
3434
query = query.view(bsz, q_len, self.num_heads, self.head_dim)
3535
key = key.view(bsz, q_len, self.num_key_value_heads, self.head_dim)
3636
value = value.view(bsz, q_len, self.num_key_value_heads, self.head_dim)
37+
# Use ipex op to rotary position embedding more efficient.
3738
key = self.ipex_rope(
3839
key,
3940
position_ids,
@@ -54,6 +55,8 @@ def llama_attn_forward(
5455
)
5556

5657
if use_cache:
58+
# This ipex op pre-allocates buffers for past_key_values and use beam index history
59+
# which to decide which beam should be used to make attention scale dot more efficient.
5760
(attn_output, attn_weights, past_key_value) = self.ipex_scale_dot_product(
5861
query,
5962
key,

0 commit comments

Comments
 (0)