Skip to content

Commit 9061322

Browse files
committed
Merge branch 'main' into bump-release
2 parents 76ae2db + 7a929e8 commit 9061322

File tree

2 files changed

+16
-16
lines changed

2 files changed

+16
-16
lines changed

optimum/intel/openvino/modeling_decoder.py

+10-16
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@
2828
from transformers import AutoModelForCausalLM, PretrainedConfig
2929
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
3030
from transformers.generation import GenerationMixin
31-
from transformers.generation.configuration_utils import GenerationConfig, GenerationMode
31+
from transformers.generation.configuration_utils import GenerationConfig
3232
from transformers.generation.logits_process import LogitsProcessorList
3333
from transformers.generation.stopping_criteria import StoppingCriteriaList
34-
from transformers.generation.utils import GenerateOutput
34+
from transformers.generation.utils import GenerateOutput, GenerationMode
3535
from transformers.modeling_outputs import CausalLMOutputWithPast
3636

3737
from optimum.utils.normalized_config import NormalizedConfigManager
@@ -386,10 +386,8 @@ def prepare_inputs(
386386
inputs = {}
387387
if not self.stateful:
388388
if past_key_values is not None:
389-
if (
390-
self.config.model_type not in MULTI_QUERY_ATTN_MODELS
391-
or self.config.model_type == "falcon"
392-
and self.config.new_decoder_architecture
389+
if self.config.model_type not in MULTI_QUERY_ATTN_MODELS or (
390+
self.config.model_type == "falcon" and self.config.new_decoder_architecture
393391
):
394392
if self._pkv_precision == Type.bf16:
395393
# numpy does not support bf16, pretending f16, should change to bf16
@@ -499,10 +497,8 @@ def forward(
499497
if self.use_cache:
500498
# Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the self-attention layer)
501499
past_key_values = tuple(self.request.get_tensor(key).data for key in self.key_value_output_names)
502-
if (
503-
self.config.model_type not in MULTI_QUERY_ATTN_MODELS
504-
or self.config.model_type == "falcon"
505-
and self.config.new_decoder_architecture
500+
if self.config.model_type not in MULTI_QUERY_ATTN_MODELS or (
501+
self.config.model_type == "falcon" and self.config.new_decoder_architecture
506502
):
507503
# Tuple of tuple of length `n_layers`, with each tuple of length equal to 2 (k/v of self-attention)
508504
past_key_values = tuple(
@@ -559,10 +555,8 @@ def _expand_outputs_for_generation(self, indicies, logits: torch.Tensor, past_ke
559555
if indicies.shape[0] != 1:
560556
logits = logits[indicies]
561557
if past_key_values and not self.stateful:
562-
if (
563-
self.config.model_type not in MULTI_QUERY_ATTN_MODELS
564-
or self.config.model_type == "falcon"
565-
and self.config.new_decoder_architecture
558+
if self.config.model_type not in MULTI_QUERY_ATTN_MODELS or (
559+
self.config.model_type == "falcon" and self.config.new_decoder_architecture
566560
):
567561
past_key_values = tuple(
568562
tuple(
@@ -581,7 +575,7 @@ def _expand_outputs_for_generation(self, indicies, logits: torch.Tensor, past_ke
581575
if self.next_beam_idx is not None
582576
else np.arange(batch_size, dtype=int)[indicies]
583577
)
584-
self._second_iter_beam_search = True
578+
self._second_iter_beam_search = True
585579
return logits, past_key_values
586580

587581
def _deduplicate_inputs(self, model_inputs: Dict):
@@ -692,7 +686,7 @@ def _reorder_cache(
692686
self._second_iter_beam_search = False
693687
return past_key_values
694688
else:
695-
if self.config.model_type not in MULTI_QUERY_ATTN_MODELS and not (
689+
if self.config.model_type not in MULTI_QUERY_ATTN_MODELS or (
696690
self.config.model_type == "falcon" and self.config.new_decoder_architecture
697691
):
698692
return tuple(

tests/openvino/test_modeling.py

+6
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,10 @@ def test_beam_search(self, model_arch):
812812
return
813813

814814
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
815+
if model_arch == "persimmon":
816+
tokenizer.pad_token_id = tokenizer.bos_token_id
817+
tokenizer.eos_token_id = tokenizer.bos_token_id
818+
815819
beam_search_gen_config = GenerationConfig(
816820
max_new_tokens=10,
817821
min_new_tokens=10,
@@ -872,6 +876,8 @@ def test_beam_search(self, model_arch):
872876
transformers_model.config.eos_token_id = None
873877

874878
for gen_config in gen_configs:
879+
if gen_config.do_sample and model_arch in ["baichuan2-13b", "olmo"]:
880+
continue
875881
transformers_outputs = transformers_model.generate(**tokens, generation_config=gen_config)
876882
ov_stateful_outputs = ov_model_stateful.generate(**tokens, generation_config=gen_config)
877883
self.assertTrue(torch.allclose(ov_stateful_outputs, transformers_outputs))

0 commit comments

Comments
 (0)