Merge branch 'main' into ea/mpt_sdpa

eaidova · web-flow · commit 9f072b26d7af · 2024-04-25T16:28:17.000+04:00
diff --git a/.github/workflows/test_openvino_examples.yml b/.github/workflows/test_openvino_examples.yml
@@ -7,11 +7,11 @@ on:
   push:
     paths:
     - '.github/workflows/test_openvino_examples.yml'
-    - 'examples/openvino/*'
+    - 'examples/openvino/**'
   pull_request:
     paths:
     - '.github/workflows/test_openvino_examples.yml'
-    - 'examples/openvino/*'
+    - 'examples/openvino/**'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -22,9 +22,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.10"]
+        python-version: ["3.8", "3.11"]
 
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
 
     steps:
     - uses: actions/checkout@v2
@@ -35,12 +35,12 @@ jobs:
 
     - name: Install dependencies
       run: |
-        pip install optimum[openvino] jstyleson nncf pytest
-        pip install -r examples/openvino/audio-classification/requirements.txt
-        pip install -r examples/openvino/image-classification/requirements.txt
-        pip install -r examples/openvino/question-answering/requirements.txt
-        pip install -r examples/openvino/text-classification/requirements.txt
+        pip install .[openvino] jstyleson pytest
+        pip install -r examples/openvino/audio-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install -r examples/openvino/image-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install -r examples/openvino/question-answering/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install -r examples/openvino/text-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
 
     - name: Test examples
       run: |
-        python -m pytest examples/openvino/test_examples.py
+        python -m pytest examples/openvino/test_examples.py
diff --git a/.github/workflows/test_openvino_notebooks.yml b/.github/workflows/test_openvino_notebooks.yml
@@ -23,9 +23,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.10"]
+        python-version: ["3.8", "3.11"]
 
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
 
     steps:
     - uses: actions/checkout@v2
diff --git a/examples/openvino/audio-classification/requirements.txt b/examples/openvino/audio-classification/requirements.txt
@@ -1,4 +1,5 @@
 datasets>=1.14.0
 evaluate
 librosa
-torchaudio
+torchaudio
+accelerate
diff --git a/examples/openvino/audio-classification/run_audio_classification.py b/examples/openvino/audio-classification/run_audio_classification.py
@@ -35,7 +35,7 @@
 from transformers.utils import check_min_version, send_example_telemetry
 from transformers.utils.versions import require_version
 
-from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
+from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
 
 
 logger = logging.getLogger(__name__)
diff --git a/examples/openvino/image-classification/requirements.txt b/examples/openvino/image-classification/requirements.txt
@@ -2,3 +2,4 @@ datasets >= 1.8.0
 torch >= 1.9.0
 torchvision>=0.6.0
 evaluate
+accelerate
diff --git a/examples/openvino/image-classification/run_image_classification.py b/examples/openvino/image-classification/run_image_classification.py
@@ -52,7 +52,7 @@
 from transformers.utils import check_min_version, send_example_telemetry
 from transformers.utils.versions import require_version
 
-from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
+from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
 
 
 logger = logging.getLogger(__name__)
diff --git a/examples/openvino/question-answering/requirements.txt b/examples/openvino/question-answering/requirements.txt
@@ -1,3 +1,4 @@
 datasets >= 1.8.0
 torch >= 1.9.0
 evaluate
+accelerate
diff --git a/examples/openvino/question-answering/run_qa.py b/examples/openvino/question-answering/run_qa.py
@@ -49,7 +49,7 @@
 from transformers.utils.versions import require_version
 from utils_qa import postprocess_qa_predictions
 
-from optimum.intel.openvino import OVConfig, OVTrainingArguments
+from optimum.intel import OVConfig, OVTrainingArguments
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
diff --git a/examples/openvino/question-answering/trainer_qa.py b/examples/openvino/question-answering/trainer_qa.py
@@ -20,7 +20,7 @@
 import torch.nn.functional as F
 from transformers.trainer_utils import PredictionOutput
 
-from optimum.intel.openvino.trainer import OVTrainer
+from optimum.intel import OVTrainer
 
 
 class QuestionAnsweringOVTrainer(OVTrainer):
diff --git a/examples/openvino/text-classification/requirements.txt b/examples/openvino/text-classification/requirements.txt
@@ -4,4 +4,5 @@ scipy
 scikit-learn
 protobuf
 torch >= 1.3
-evaluate
+evaluate
+accelerate
diff --git a/examples/openvino/text-classification/run_glue.py b/examples/openvino/text-classification/run_glue.py
@@ -46,7 +46,7 @@
 from transformers.utils import check_min_version, send_example_telemetry
 from transformers.utils.versions import require_version
 
-from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
+from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
diff --git a/notebooks/openvino/optimum_openvino_inference.ipynb b/notebooks/openvino/optimum_openvino_inference.ipynb
@@ -76,7 +76,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForQuestionAnswering\n",
+    "from optimum.intel import OVModelForQuestionAnswering\n",
     "\n",
     "# Load PyTorch model from the Hub and export to OpenVINO in the background\n",
     "model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad\", export=True)\n",
@@ -182,7 +182,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForQuestionAnswering\n",
+    "from optimum.intel import OVModelForQuestionAnswering\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
@@ -240,7 +240,7 @@
    ],
    "source": [
     "import torch\n",
-    "from optimum.intel.openvino import OVModelForQuestionAnswering\n",
+    "from optimum.intel import OVModelForQuestionAnswering\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
@@ -324,7 +324,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForQuestionAnswering\n",
+    "from optimum.intel import OVModelForQuestionAnswering\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model = OVModelForQuestionAnswering.from_pretrained(\n",
@@ -529,7 +529,7 @@
    ],
    "source": [
     "from IPython.display import Audio\n",
-    "from optimum.intel.openvino import OVModelForAudioClassification\n",
+    "from optimum.intel import OVModelForAudioClassification\n",
     "from transformers import AutoFeatureExtractor, pipeline\n",
     "from datasets import load_dataset\n",
     "\n",
@@ -638,7 +638,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForCausalLM\n",
+    "from optimum.intel import OVModelForCausalLM\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model_id = \"helenai/gpt2-ov\"\n",
@@ -704,7 +704,7 @@
    ],
    "source": [
     "from IPython.display import Image\n",
-    "from optimum.intel.openvino import OVModelForImageClassification\n",
+    "from optimum.intel import OVModelForImageClassification\n",
     "from transformers import AutoImageProcessor, pipeline\n",
     "\n",
     "model_id = \"helenai/microsoft-swin-tiny-patch4-window7-224-ov\"\n",
@@ -766,7 +766,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForMaskedLM\n",
+    "from optimum.intel import OVModelForMaskedLM\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model_id = \"helenai/bert-base-uncased-ov\"\n",
@@ -835,7 +835,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForQuestionAnswering\n",
+    "from optimum.intel import OVModelForQuestionAnswering\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "# Load the model and tokenizer saved in Part 1 of this notebook. Or use the line below to load them from the hub\n",
@@ -890,7 +890,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForSeq2SeqLM\n",
+    "from optimum.intel import OVModelForSeq2SeqLM\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model_id = \"helenai/t5-small-ov\"\n",
@@ -998,7 +998,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForSequenceClassification\n",
+    "from optimum.intel import OVModelForSequenceClassification\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model_id = \"helenai/papluca-xlm-roberta-base-language-detection-ov\"\n",
@@ -1047,7 +1047,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForTokenClassification\n",
+    "from optimum.intel import OVModelForTokenClassification\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model_id = \"helenai/dslim-bert-base-NER-ov-fp32\"\n",
diff --git a/notebooks/openvino/question_answering_quantization.ipynb b/notebooks/openvino/question_answering_quantization.ipynb
@@ -51,7 +51,7 @@
     "import transformers\n",
     "from evaluate import evaluator\n",
     "from openvino.runtime import Core\n",
-    "from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
+    "from optimum.intel import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
     "from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline\n",
     "\n",
     "transformers.logging.set_verbosity_error()\n",
@@ -286,7 +286,7 @@
     "**NOTE:** if you notice very low accuracy after post-training quantization, it is likely caused by an overflow issue which affects processors that do not contain VNNI (Vector Neural Network Instruction). NNCF has an `overflow_fix` option to address this. It will effectively use 7-bits for quantizing instead of 8-bits to prevent the overflow. To use this option, modify the code in the next cell to add an explicit quantization configuration, and set `overflow_fix` to `\"enable\"`:\n",
     "\n",
     "```\n",
-    "from optimum.intel.openvino import OVConfig, OVQuantizationConfig\n",
+    "from optimum.intel import OVConfig, OVQuantizationConfig\n",
     "\n",
     "ov_config = OVConfig(quantization_config=OVQuantizationConfig(overflow_fix=\"enable\")\n",
     "quantizer = OVQuantizer.from_pretrained(model)\n",
diff --git a/notebooks/openvino/requirements.txt b/notebooks/openvino/requirements.txt
@@ -1,4 +1,4 @@
-optimum-intel[openvino, nncf]
+optimum-intel[openvino]
 datasets
 evaluate[evaluator]
 ipywidgets
diff --git a/notebooks/openvino/stable_diffusion_optimization.ipynb b/notebooks/openvino/stable_diffusion_optimization.ipynb
@@ -14,7 +14,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from optimum.intel.openvino import OVStableDiffusionPipeline\n",
+    "from optimum.intel import OVStableDiffusionPipeline\n",
     "from diffusers.training_utils import set_seed\n",
     "from IPython.display import display"
    ]
diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
@@ -19,13 +19,14 @@
 from transformers.utils import is_tf_available
 
 from optimum.exporters.onnx.config import TextDecoderOnnxConfig, TextDecoderWithPositionIdsOnnxConfig
-from optimum.exporters.onnx.model_configs import GemmaOnnxConfig, LlamaOnnxConfig, MPTOnnxConfig
+from optimum.exporters.onnx.model_configs import FalconOnnxConfig, GemmaOnnxConfig, LlamaOnnxConfig, MPTOnnxConfig
 from optimum.exporters.tasks import TasksManager
 from optimum.utils import DEFAULT_DUMMY_SHAPES
 from optimum.utils.input_generators import (
     DummyInputGenerator,
     DummyPastKeyValuesGenerator,
     DummyTextInputGenerator,
+    FalconDummyPastKeyValuesGenerator,
     MistralDummyPastKeyValuesGenerator,
 )
 from optimum.utils.normalized_config import NormalizedTextConfig
@@ -454,3 +455,50 @@ def patch_model_for_export(
         self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
     ) -> "ModelPatcher":
         return MPTModelPatcher(self, model, model_kwargs=model_kwargs)
+
+
+class OVFalconDummyPastKeyValuesGenerator(FalconDummyPastKeyValuesGenerator):
+    def __init__(
+        self,
+        task: str,
+        normalized_config: NormalizedTextConfig,
+        batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
+        sequence_length: int = DEFAULT_DUMMY_SHAPES["sequence_length"],
+        random_batch_size_range: Optional[Tuple[int, int]] = None,
+        random_sequence_length_range: Optional[Tuple[int, int]] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            task=task,
+            normalized_config=normalized_config,
+            batch_size=batch_size,
+            sequence_length=sequence_length,
+            random_batch_size_range=random_batch_size_range,
+            random_sequence_length_range=random_sequence_length_range,
+            **kwargs,
+        )
+        if normalized_config.new_decoder_architecture:
+            self.num_kv_heads = normalized_config.num_attention_heads
+        else:
+            self.num_kv_heads = normalized_config.num_kv_heads if not normalized_config.multi_query else 1
+
+        self.head_dim = self.hidden_size // self.num_attention_heads
+
+
+@register_in_tasks_manager(
+    "falcon",
+    *[
+        "feature-extraction",
+        "feature-extraction-with-past",
+        "question-answering",
+        "text-generation",
+        "text-generation-with-past",
+        "token-classification",
+    ],
+    library_name="transformers",
+)
+class FalconOpenVINOConfig(FalconOnnxConfig):
+    DUMMY_INPUT_GENERATOR_CLASSES = (
+        OVFalconDummyPastKeyValuesGenerator,
+    ) + TextDecoderOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES
+    DUMMY_PKV_GENERATOR_CLASS = OVFalconDummyPastKeyValuesGenerator
diff --git a/optimum/intel/generation/modeling.py b/optimum/intel/generation/modeling.py
@@ -180,13 +180,22 @@ def _reorder_cache(
         """
         if self.config.model_type == "bloom":
             return self._reorder_cache_bloom(past_key_values, beam_idx)
+        elif self.config.model_type == "gpt_bigcode":
+            return self._reorder_cache_gpt_bigcode(past_key_values, beam_idx)
 
         # from transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel._reorder_cache
         return tuple(
             tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past)
             for layer_past in past_key_values
         )
 
+    # Copied from transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeForCausalLM._reorder_cache
+    @staticmethod
+    def _reorder_cache_gpt_bigcode(
+        past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor
+    ) -> Tuple[Tuple[torch.Tensor]]:
+        return tuple(layer_past.index_select(0, beam_idx.to(layer_past.device)) for layer_past in past_key_values)
+
     # Copied from transformers.models.bloom.modeling_bloom.BloomForCausalLM._reorder_cache
     def _reorder_cache_bloom(
         self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor
diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
@@ -310,7 +310,9 @@ def __init__(
         if isinstance(quantization_config, dict):
             quantization_config = self._quantization_config_from_dict(quantization_config)
         self.quantization_config = quantization_config
-        self.compression = None  # A field for backward-compatability of training-time compression parameters
+        self.compression = kwargs.get(
+            "compression", None
+        )  # A field for backward-compatability of training-time compression parameters
         bits = self.quantization_config.bits if self.quantization_config else None
         self.dtype = "int" + str(bits) if isinstance(bits, int) else dtype
 
diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py
@@ -224,7 +224,7 @@
 
     ```python
     >>> from transformers import {processor_class}
-    >>> from optimum.intel.openvino import {model_class}
+    >>> from optimum.intel import {model_class}
     >>> from datasets import load_dataset
 
     >>> processor = {processor_class}.from_pretrained("{checkpoint}")
@@ -241,7 +241,7 @@
 
     ```python
     >>> from transformers import {processor_class}, pipeline
-    >>> from optimum.intel.openvino import {model_class}
+    >>> from optimum.intel import {model_class}
     >>> from datasets import load_dataset
 
     >>> processor = {processor_class}.from_pretrained("{checkpoint}")
diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
diff --git a/tests/generation/test_modeling.py b/tests/generation/test_modeling.py
diff --git a/tests/ipex/test_inference.py b/tests/ipex/test_inference.py
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-optimum-intel[openvino, nncf]`
	`1`	`+optimum-intel[openvino]`
`2`	`2`	`datasets`
`3`	`3`	`evaluate[evaluator]`
`4`	`4`	`ipywidgets`
Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@`
`14`	`14`	`"metadata": {},`
`15`	`15`	`"outputs": [],`
`16`	`16`	`"source": [`
`17`		`- "from optimum.intel.openvino import OVStableDiffusionPipeline\n",`
	`17`	`+ "from optimum.intel import OVStableDiffusionPipeline\n",`
`18`	`18`	`"from diffusers.training_utils import set_seed\n",`
`19`	`19`	`"from IPython.display import display"`
`20`	`20`	`]`