Merge branch 'main' into support_phi3_export

eaidova · eaidova · commit 7d327edb5379 · 2024-04-25T16:23:54.000+04:00
diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
@@ -19,13 +19,14 @@
 from transformers.utils import is_tf_available
 
 from optimum.exporters.onnx.config import TextDecoderOnnxConfig, TextDecoderWithPositionIdsOnnxConfig
-from optimum.exporters.onnx.model_configs import GemmaOnnxConfig, LlamaOnnxConfig, PhiOnnxConfig
+from optimum.exporters.onnx.model_configs import FalconOnnxConfig, GemmaOnnxConfig, LlamaOnnxConfig, PhiOnnxConfig
 from optimum.exporters.tasks import TasksManager
 from optimum.utils import DEFAULT_DUMMY_SHAPES
 from optimum.utils.input_generators import (
     DummyInputGenerator,
     DummyPastKeyValuesGenerator,
     DummyTextInputGenerator,
+    FalconDummyPastKeyValuesGenerator,
     MistralDummyPastKeyValuesGenerator,
 )
 from optimum.utils.normalized_config import NormalizedTextConfig
@@ -456,3 +457,50 @@ def patch_model_for_export(
         self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
     ) -> "ModelPatcher":
         return Phi3ModelPatcher(self, model, model_kwargs=model_kwargs)
+
+
+class OVFalconDummyPastKeyValuesGenerator(FalconDummyPastKeyValuesGenerator):
+    def __init__(
+        self,
+        task: str,
+        normalized_config: NormalizedTextConfig,
+        batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
+        sequence_length: int = DEFAULT_DUMMY_SHAPES["sequence_length"],
+        random_batch_size_range: Optional[Tuple[int, int]] = None,
+        random_sequence_length_range: Optional[Tuple[int, int]] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            task=task,
+            normalized_config=normalized_config,
+            batch_size=batch_size,
+            sequence_length=sequence_length,
+            random_batch_size_range=random_batch_size_range,
+            random_sequence_length_range=random_sequence_length_range,
+            **kwargs,
+        )
+        if normalized_config.new_decoder_architecture:
+            self.num_kv_heads = normalized_config.num_attention_heads
+        else:
+            self.num_kv_heads = normalized_config.num_kv_heads if not normalized_config.multi_query else 1
+
+        self.head_dim = self.hidden_size // self.num_attention_heads
+
+
+@register_in_tasks_manager(
+    "falcon",
+    *[
+        "feature-extraction",
+        "feature-extraction-with-past",
+        "question-answering",
+        "text-generation",
+        "text-generation-with-past",
+        "token-classification",
+    ],
+    library_name="transformers",
+)
+class FalconOpenVINOConfig(FalconOnnxConfig):
+    DUMMY_INPUT_GENERATOR_CLASSES = (
+        OVFalconDummyPastKeyValuesGenerator,
+    ) + TextDecoderOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES
+    DUMMY_PKV_GENERATOR_CLASS = OVFalconDummyPastKeyValuesGenerator
diff --git a/optimum/intel/generation/modeling.py b/optimum/intel/generation/modeling.py
@@ -180,13 +180,22 @@ def _reorder_cache(
         """
         if self.config.model_type == "bloom":
             return self._reorder_cache_bloom(past_key_values, beam_idx)
+        elif self.config.model_type == "gpt_bigcode":
+            return self._reorder_cache_gpt_bigcode(past_key_values, beam_idx)
 
         # from transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel._reorder_cache
         return tuple(
             tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past)
             for layer_past in past_key_values
         )
 
+    # Copied from transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeForCausalLM._reorder_cache
+    @staticmethod
+    def _reorder_cache_gpt_bigcode(
+        past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor
+    ) -> Tuple[Tuple[torch.Tensor]]:
+        return tuple(layer_past.index_select(0, beam_idx.to(layer_past.device)) for layer_past in past_key_values)
+
     # Copied from transformers.models.bloom.modeling_bloom.BloomForCausalLM._reorder_cache
     def _reorder_cache_bloom(
         self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor
diff --git a/tests/generation/test_modeling.py b/tests/generation/test_modeling.py
@@ -58,7 +58,7 @@ class ModelingIntegrationTest(unittest.TestCase):
         "mistral",
         "llama",
         "llama2",
-        # "gpt_bigcode",
+        "gpt_bigcode",
     )
 
     GENERATION_LENGTH = 100
diff --git a/tests/ipex/test_inference.py b/tests/ipex/test_inference.py
@@ -65,7 +65,7 @@ class IPEXIntegrationTest(unittest.TestCase):
         "gptj",
         "gpt2",
         "gpt_neo",
-        # "gpt_bigcode",
+        "gpt_bigcode",
         "llama",
         "llama2",
         "opt",
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
@@ -539,6 +539,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
         "internlm2",
         "orion",
         "falcon",
+        "falcon-40b",
     )
     GENERATION_LENGTH = 100
     REMOTE_CODE_MODELS = ("chatglm", "minicpm", "baichuan2", "jais", "qwen", "internlm2", "olmo", "orion", "phi3")
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
@@ -44,6 +44,7 @@
     "electra": "hf-internal-testing/tiny-random-electra",
     "gemma": "fxmarty/tiny-random-GemmaForCausalLM",
     "falcon": "fxmarty/really-tiny-falcon-testing",
+    "falcon-40b": "katuni4ka/tiny-random-falcon-40b",
     "flaubert": "hf-internal-testing/tiny-random-flaubert",
     "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
     "gpt2": "hf-internal-testing/tiny-random-gpt2",

Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ class ModelingIntegrationTest(unittest.TestCase):`
`58`	`58`	`"mistral",`
`59`	`59`	`"llama",`
`60`	`60`	`"llama2",`
`61`		`- # "gpt_bigcode",`
	`61`	`+ "gpt_bigcode",`
`62`	`62`	`)`
`63`	`63`
`64`	`64`	`GENERATION_LENGTH = 100`
Original file line number	Diff line number	Diff line change
`@@ -539,6 +539,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):`
`539`	`539`	`"internlm2",`
`540`	`540`	`"orion",`
`541`	`541`	`"falcon",`
	`542`	`+ "falcon-40b",`
`542`	`543`	`)`
`543`	`544`	`GENERATION_LENGTH = 100`
`544`	`545`	`REMOTE_CODE_MODELS = ("chatglm", "minicpm", "baichuan2", "jais", "qwen", "internlm2", "olmo", "orion", "phi3")`