Skip to content

Commit 9f072b2

Browse files
authored
Merge branch 'main' into ea/mpt_sdpa
2 parents e60872c + 33fc7b7 commit 9f072b2

24 files changed

+108
-43
lines changed

.github/workflows/test_openvino_examples.yml

+10-10
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ on:
77
push:
88
paths:
99
- '.github/workflows/test_openvino_examples.yml'
10-
- 'examples/openvino/*'
10+
- 'examples/openvino/**'
1111
pull_request:
1212
paths:
1313
- '.github/workflows/test_openvino_examples.yml'
14-
- 'examples/openvino/*'
14+
- 'examples/openvino/**'
1515

1616
concurrency:
1717
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -22,9 +22,9 @@ jobs:
2222
strategy:
2323
fail-fast: false
2424
matrix:
25-
python-version: ["3.8", "3.10"]
25+
python-version: ["3.8", "3.11"]
2626

27-
runs-on: ubuntu-20.04
27+
runs-on: ubuntu-22.04
2828

2929
steps:
3030
- uses: actions/checkout@v2
@@ -35,12 +35,12 @@ jobs:
3535

3636
- name: Install dependencies
3737
run: |
38-
pip install optimum[openvino] jstyleson nncf pytest
39-
pip install -r examples/openvino/audio-classification/requirements.txt
40-
pip install -r examples/openvino/image-classification/requirements.txt
41-
pip install -r examples/openvino/question-answering/requirements.txt
42-
pip install -r examples/openvino/text-classification/requirements.txt
38+
pip install .[openvino] jstyleson pytest
39+
pip install -r examples/openvino/audio-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
40+
pip install -r examples/openvino/image-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
41+
pip install -r examples/openvino/question-answering/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
42+
pip install -r examples/openvino/text-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
4343
4444
- name: Test examples
4545
run: |
46-
python -m pytest examples/openvino/test_examples.py
46+
python -m pytest examples/openvino/test_examples.py

.github/workflows/test_openvino_notebooks.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ jobs:
2323
strategy:
2424
fail-fast: false
2525
matrix:
26-
python-version: ["3.8", "3.10"]
26+
python-version: ["3.8", "3.11"]
2727

28-
runs-on: ubuntu-20.04
28+
runs-on: ubuntu-22.04
2929

3030
steps:
3131
- uses: actions/checkout@v2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
datasets>=1.14.0
22
evaluate
33
librosa
4-
torchaudio
4+
torchaudio
5+
accelerate

examples/openvino/audio-classification/run_audio_classification.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
from transformers.utils import check_min_version, send_example_telemetry
3636
from transformers.utils.versions import require_version
3737

38-
from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
38+
from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
3939

4040

4141
logger = logging.getLogger(__name__)

examples/openvino/image-classification/requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ datasets >= 1.8.0
22
torch >= 1.9.0
33
torchvision>=0.6.0
44
evaluate
5+
accelerate

examples/openvino/image-classification/run_image_classification.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
from transformers.utils import check_min_version, send_example_telemetry
5353
from transformers.utils.versions import require_version
5454

55-
from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
55+
from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
5656

5757

5858
logger = logging.getLogger(__name__)
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
datasets >= 1.8.0
22
torch >= 1.9.0
33
evaluate
4+
accelerate

examples/openvino/question-answering/run_qa.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
from transformers.utils.versions import require_version
5050
from utils_qa import postprocess_qa_predictions
5151

52-
from optimum.intel.openvino import OVConfig, OVTrainingArguments
52+
from optimum.intel import OVConfig, OVTrainingArguments
5353

5454

5555
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.

examples/openvino/question-answering/trainer_qa.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import torch.nn.functional as F
2121
from transformers.trainer_utils import PredictionOutput
2222

23-
from optimum.intel.openvino.trainer import OVTrainer
23+
from optimum.intel import OVTrainer
2424

2525

2626
class QuestionAnsweringOVTrainer(OVTrainer):

examples/openvino/text-classification/requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ scipy
44
scikit-learn
55
protobuf
66
torch >= 1.3
7-
evaluate
7+
evaluate
8+
accelerate

examples/openvino/text-classification/run_glue.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
from transformers.utils import check_min_version, send_example_telemetry
4747
from transformers.utils.versions import require_version
4848

49-
from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
49+
from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
5050

5151

5252
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.

notebooks/openvino/optimum_openvino_inference.ipynb

+12-12
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@
7676
}
7777
],
7878
"source": [
79-
"from optimum.intel.openvino import OVModelForQuestionAnswering\n",
79+
"from optimum.intel import OVModelForQuestionAnswering\n",
8080
"\n",
8181
"# Load PyTorch model from the Hub and export to OpenVINO in the background\n",
8282
"model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad\", export=True)\n",
@@ -182,7 +182,7 @@
182182
}
183183
],
184184
"source": [
185-
"from optimum.intel.openvino import OVModelForQuestionAnswering\n",
185+
"from optimum.intel import OVModelForQuestionAnswering\n",
186186
"from transformers import AutoTokenizer, pipeline\n",
187187
"\n",
188188
"model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
@@ -240,7 +240,7 @@
240240
],
241241
"source": [
242242
"import torch\n",
243-
"from optimum.intel.openvino import OVModelForQuestionAnswering\n",
243+
"from optimum.intel import OVModelForQuestionAnswering\n",
244244
"from transformers import AutoTokenizer, pipeline\n",
245245
"\n",
246246
"model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
@@ -324,7 +324,7 @@
324324
}
325325
],
326326
"source": [
327-
"from optimum.intel.openvino import OVModelForQuestionAnswering\n",
327+
"from optimum.intel import OVModelForQuestionAnswering\n",
328328
"from transformers import AutoTokenizer, pipeline\n",
329329
"\n",
330330
"model = OVModelForQuestionAnswering.from_pretrained(\n",
@@ -529,7 +529,7 @@
529529
],
530530
"source": [
531531
"from IPython.display import Audio\n",
532-
"from optimum.intel.openvino import OVModelForAudioClassification\n",
532+
"from optimum.intel import OVModelForAudioClassification\n",
533533
"from transformers import AutoFeatureExtractor, pipeline\n",
534534
"from datasets import load_dataset\n",
535535
"\n",
@@ -638,7 +638,7 @@
638638
}
639639
],
640640
"source": [
641-
"from optimum.intel.openvino import OVModelForCausalLM\n",
641+
"from optimum.intel import OVModelForCausalLM\n",
642642
"from transformers import AutoTokenizer, pipeline\n",
643643
"\n",
644644
"model_id = \"helenai/gpt2-ov\"\n",
@@ -704,7 +704,7 @@
704704
],
705705
"source": [
706706
"from IPython.display import Image\n",
707-
"from optimum.intel.openvino import OVModelForImageClassification\n",
707+
"from optimum.intel import OVModelForImageClassification\n",
708708
"from transformers import AutoImageProcessor, pipeline\n",
709709
"\n",
710710
"model_id = \"helenai/microsoft-swin-tiny-patch4-window7-224-ov\"\n",
@@ -766,7 +766,7 @@
766766
}
767767
],
768768
"source": [
769-
"from optimum.intel.openvino import OVModelForMaskedLM\n",
769+
"from optimum.intel import OVModelForMaskedLM\n",
770770
"from transformers import AutoTokenizer, pipeline\n",
771771
"\n",
772772
"model_id = \"helenai/bert-base-uncased-ov\"\n",
@@ -835,7 +835,7 @@
835835
}
836836
],
837837
"source": [
838-
"from optimum.intel.openvino import OVModelForQuestionAnswering\n",
838+
"from optimum.intel import OVModelForQuestionAnswering\n",
839839
"from transformers import AutoTokenizer, pipeline\n",
840840
"\n",
841841
"# Load the model and tokenizer saved in Part 1 of this notebook. Or use the line below to load them from the hub\n",
@@ -890,7 +890,7 @@
890890
}
891891
],
892892
"source": [
893-
"from optimum.intel.openvino import OVModelForSeq2SeqLM\n",
893+
"from optimum.intel import OVModelForSeq2SeqLM\n",
894894
"from transformers import AutoTokenizer, pipeline\n",
895895
"\n",
896896
"model_id = \"helenai/t5-small-ov\"\n",
@@ -998,7 +998,7 @@
998998
}
999999
],
10001000
"source": [
1001-
"from optimum.intel.openvino import OVModelForSequenceClassification\n",
1001+
"from optimum.intel import OVModelForSequenceClassification\n",
10021002
"from transformers import AutoTokenizer, pipeline\n",
10031003
"\n",
10041004
"model_id = \"helenai/papluca-xlm-roberta-base-language-detection-ov\"\n",
@@ -1047,7 +1047,7 @@
10471047
}
10481048
],
10491049
"source": [
1050-
"from optimum.intel.openvino import OVModelForTokenClassification\n",
1050+
"from optimum.intel import OVModelForTokenClassification\n",
10511051
"from transformers import AutoTokenizer, pipeline\n",
10521052
"\n",
10531053
"model_id = \"helenai/dslim-bert-base-NER-ov-fp32\"\n",

notebooks/openvino/question_answering_quantization.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
"import transformers\n",
5252
"from evaluate import evaluator\n",
5353
"from openvino.runtime import Core\n",
54-
"from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
54+
"from optimum.intel import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
5555
"from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline\n",
5656
"\n",
5757
"transformers.logging.set_verbosity_error()\n",
@@ -286,7 +286,7 @@
286286
"**NOTE:** if you notice very low accuracy after post-training quantization, it is likely caused by an overflow issue which affects processors that do not contain VNNI (Vector Neural Network Instruction). NNCF has an `overflow_fix` option to address this. It will effectively use 7-bits for quantizing instead of 8-bits to prevent the overflow. To use this option, modify the code in the next cell to add an explicit quantization configuration, and set `overflow_fix` to `\"enable\"`:\n",
287287
"\n",
288288
"```\n",
289-
"from optimum.intel.openvino import OVConfig, OVQuantizationConfig\n",
289+
"from optimum.intel import OVConfig, OVQuantizationConfig\n",
290290
"\n",
291291
"ov_config = OVConfig(quantization_config=OVQuantizationConfig(overflow_fix=\"enable\")\n",
292292
"quantizer = OVQuantizer.from_pretrained(model)\n",

notebooks/openvino/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
optimum-intel[openvino, nncf]
1+
optimum-intel[openvino]
22
datasets
33
evaluate[evaluator]
44
ipywidgets

notebooks/openvino/stable_diffusion_optimization.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"metadata": {},
1515
"outputs": [],
1616
"source": [
17-
"from optimum.intel.openvino import OVStableDiffusionPipeline\n",
17+
"from optimum.intel import OVStableDiffusionPipeline\n",
1818
"from diffusers.training_utils import set_seed\n",
1919
"from IPython.display import display"
2020
]

optimum/exporters/openvino/model_configs.py

+49-1
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,14 @@
1919
from transformers.utils import is_tf_available
2020

2121
from optimum.exporters.onnx.config import TextDecoderOnnxConfig, TextDecoderWithPositionIdsOnnxConfig
22-
from optimum.exporters.onnx.model_configs import GemmaOnnxConfig, LlamaOnnxConfig, MPTOnnxConfig
22+
from optimum.exporters.onnx.model_configs import FalconOnnxConfig, GemmaOnnxConfig, LlamaOnnxConfig, MPTOnnxConfig
2323
from optimum.exporters.tasks import TasksManager
2424
from optimum.utils import DEFAULT_DUMMY_SHAPES
2525
from optimum.utils.input_generators import (
2626
DummyInputGenerator,
2727
DummyPastKeyValuesGenerator,
2828
DummyTextInputGenerator,
29+
FalconDummyPastKeyValuesGenerator,
2930
MistralDummyPastKeyValuesGenerator,
3031
)
3132
from optimum.utils.normalized_config import NormalizedTextConfig
@@ -454,3 +455,50 @@ def patch_model_for_export(
454455
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
455456
) -> "ModelPatcher":
456457
return MPTModelPatcher(self, model, model_kwargs=model_kwargs)
458+
459+
460+
class OVFalconDummyPastKeyValuesGenerator(FalconDummyPastKeyValuesGenerator):
461+
def __init__(
462+
self,
463+
task: str,
464+
normalized_config: NormalizedTextConfig,
465+
batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
466+
sequence_length: int = DEFAULT_DUMMY_SHAPES["sequence_length"],
467+
random_batch_size_range: Optional[Tuple[int, int]] = None,
468+
random_sequence_length_range: Optional[Tuple[int, int]] = None,
469+
**kwargs,
470+
):
471+
super().__init__(
472+
task=task,
473+
normalized_config=normalized_config,
474+
batch_size=batch_size,
475+
sequence_length=sequence_length,
476+
random_batch_size_range=random_batch_size_range,
477+
random_sequence_length_range=random_sequence_length_range,
478+
**kwargs,
479+
)
480+
if normalized_config.new_decoder_architecture:
481+
self.num_kv_heads = normalized_config.num_attention_heads
482+
else:
483+
self.num_kv_heads = normalized_config.num_kv_heads if not normalized_config.multi_query else 1
484+
485+
self.head_dim = self.hidden_size // self.num_attention_heads
486+
487+
488+
@register_in_tasks_manager(
489+
"falcon",
490+
*[
491+
"feature-extraction",
492+
"feature-extraction-with-past",
493+
"question-answering",
494+
"text-generation",
495+
"text-generation-with-past",
496+
"token-classification",
497+
],
498+
library_name="transformers",
499+
)
500+
class FalconOpenVINOConfig(FalconOnnxConfig):
501+
DUMMY_INPUT_GENERATOR_CLASSES = (
502+
OVFalconDummyPastKeyValuesGenerator,
503+
) + TextDecoderOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES
504+
DUMMY_PKV_GENERATOR_CLASS = OVFalconDummyPastKeyValuesGenerator

optimum/intel/generation/modeling.py

+9
Original file line numberDiff line numberDiff line change
@@ -180,13 +180,22 @@ def _reorder_cache(
180180
"""
181181
if self.config.model_type == "bloom":
182182
return self._reorder_cache_bloom(past_key_values, beam_idx)
183+
elif self.config.model_type == "gpt_bigcode":
184+
return self._reorder_cache_gpt_bigcode(past_key_values, beam_idx)
183185

184186
# from transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel._reorder_cache
185187
return tuple(
186188
tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past)
187189
for layer_past in past_key_values
188190
)
189191

192+
# Copied from transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeForCausalLM._reorder_cache
193+
@staticmethod
194+
def _reorder_cache_gpt_bigcode(
195+
past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor
196+
) -> Tuple[Tuple[torch.Tensor]]:
197+
return tuple(layer_past.index_select(0, beam_idx.to(layer_past.device)) for layer_past in past_key_values)
198+
190199
# Copied from transformers.models.bloom.modeling_bloom.BloomForCausalLM._reorder_cache
191200
def _reorder_cache_bloom(
192201
self, past_key_values: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor

optimum/intel/openvino/configuration.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,9 @@ def __init__(
310310
if isinstance(quantization_config, dict):
311311
quantization_config = self._quantization_config_from_dict(quantization_config)
312312
self.quantization_config = quantization_config
313-
self.compression = None # A field for backward-compatability of training-time compression parameters
313+
self.compression = kwargs.get(
314+
"compression", None
315+
) # A field for backward-compatability of training-time compression parameters
314316
bits = self.quantization_config.bits if self.quantization_config else None
315317
self.dtype = "int" + str(bits) if isinstance(bits, int) else dtype
316318

optimum/intel/openvino/modeling_seq2seq.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@
224224
225225
```python
226226
>>> from transformers import {processor_class}
227-
>>> from optimum.intel.openvino import {model_class}
227+
>>> from optimum.intel import {model_class}
228228
>>> from datasets import load_dataset
229229
230230
>>> processor = {processor_class}.from_pretrained("{checkpoint}")
@@ -241,7 +241,7 @@
241241
242242
```python
243243
>>> from transformers import {processor_class}, pipeline
244-
>>> from optimum.intel.openvino import {model_class}
244+
>>> from optimum.intel import {model_class}
245245
>>> from datasets import load_dataset
246246
247247
>>> processor = {processor_class}.from_pretrained("{checkpoint}")

0 commit comments

Comments
 (0)