Skip to content

Commit 6815773

Browse files
authored
Merge branch 'main' into cli-awq
2 parents e8cc0e9 + f06f504 commit 6815773

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1191
-209
lines changed

.github/workflows/test_inc.yml

+1-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
python -m pip install --upgrade pip
3333
pip install cmake
3434
pip install py-cpuinfo
35-
pip install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cpu
35+
pip install torch==2.3.0 torchaudio==2.3.0 torchvision==0.18 --index-url https://download.pytorch.org/whl/cpu
3636
pip install .[neural-compressor,diffusers,tests]
3737
pip install intel-extension-for-transformers
3838
pip install peft
@@ -43,7 +43,6 @@ jobs:
4343
- name: Test IPEX
4444
run: |
4545
pip uninstall -y intel-extension-for-transformers
46-
pip install torch==2.3.0 torchaudio==2.3.0 torchvision==0.18 --extra-index-url https://download.pytorch.org/whl/cpu
4746
pip install intel-extension-for-pytorch==2.3.0
4847
pytest tests/neural_compressor/test_ipex.py
4948

.github/workflows/test_ipex.yml

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ jobs:
1818
fail-fast: false
1919
matrix:
2020
python-version: [3.8, 3.9]
21+
transformers-version: [4.39.0, 4.41.2]
2122
os: [ubuntu-latest]
2223

2324
runs-on: ${{ matrix.os }}
@@ -32,6 +33,7 @@ jobs:
3233
python -m pip install --upgrade pip
3334
pip install torch torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
3435
pip install .[ipex,tests]
36+
pip install transformers==${{ matrix.transformers-version }}
3537
- name: Test with Pytest
3638
run: |
3739
pytest tests/ipex/

.github/workflows/test_openvino.yml

+4-3
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ jobs:
1717
strategy:
1818
fail-fast: false
1919
matrix:
20-
python-version: [3.8, 3.11]
20+
python-version: ["3.8", "3.12"]
2121
os: [ubuntu-latest]
2222

2323
runs-on: ${{ matrix.os }}
2424
steps:
25-
- uses: actions/checkout@v2
25+
- uses: actions/checkout@v4
2626
- name: Setup Python ${{ matrix.python-version }}
27-
uses: actions/setup-python@v2
27+
uses: actions/setup-python@v5
2828
with:
2929
python-version: ${{ matrix.python-version }}
3030
- name: Install dependencies
@@ -46,3 +46,4 @@ jobs:
4646
pip install openvino-nightly
4747
python -c "from optimum.intel import OVModelForCausalLM; OVModelForCausalLM.from_pretrained('hf-internal-testing/tiny-random-gpt2', export=True, compile=False)"
4848
optimum-cli export openvino -m hf-internal-testing/tiny-random-gpt2 gpt2-ov
49+

.github/workflows/test_openvino_basic.yml

+13-8
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@ jobs:
2424
matrix:
2525
# Testing lower and upper bound of supported Python versions
2626
# This also ensures that the test fails if dependencies break for Python 3.7
27-
python-version: ["3.8", "3.11"]
28-
transformers: ['transformers']
27+
python-version: ["3.8", "3.12"]
2928
optimum: ['optimum', 'git+https://github.com/huggingface/optimum.git']
29+
os: ["ubuntu-22.04", "windows-latest"]
3030

31-
runs-on: ubuntu-20.04
31+
runs-on: ${{ matrix.os }}
3232

3333
steps:
34-
- uses: actions/checkout@v2
34+
- uses: actions/checkout@v4
3535
- name: Setup Python ${{ matrix.python-version }}
36-
uses: actions/setup-python@v2
36+
uses: actions/setup-python@v5
3737
with:
3838
python-version: ${{ matrix.python-version }}
3939

@@ -43,12 +43,17 @@ jobs:
4343
# optimum or transformers to a specific version
4444
# Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
4545
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
46-
pip install .[tests] openvino onnx onnxruntime ${{ matrix.optimum}} ${{ matrix.transformers }}
46+
pip install .[tests] openvino onnxruntime ${{ matrix.optimum}}
4747
48-
- name: Pip freeze
48+
- name: Pip freeze
4949
run: pip freeze
5050

5151
- name: Test with Pytest
5252
run: |
5353
pytest tests/openvino/test_modeling_basic.py
54-
RUN_SLOW=1 pytest tests/openvino/test_modeling.py -s -m "run_slow" --durations=0
54+
55+
- name: Slow tests
56+
run: |
57+
pytest tests/openvino/test_modeling.py -s -m "run_slow" --durations=0
58+
env:
59+
RUN_SLOW: 1

.github/workflows/test_openvino_examples.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
strategy:
2323
fail-fast: false
2424
matrix:
25-
python-version: ["3.8", "3.11"]
25+
python-version: ["3.8", "3.12"]
2626

2727
runs-on: ubuntu-22.04
2828

.github/workflows/test_openvino_notebooks.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
strategy:
2424
fail-fast: false
2525
matrix:
26-
python-version: ["3.8", "3.11"]
26+
python-version: ["3.8", "3.12"]
2727

2828
runs-on: ubuntu-22.04
2929

README.md

+5
Original file line numberDiff line numberDiff line change
@@ -239,3 +239,8 @@ Do not forget to install requirements for every example:
239239
cd <example-folder>
240240
pip install -r requirements.txt
241241
```
242+
243+
244+
## Gaudi
245+
246+
To train your model on [Intel Gaudi AI Accelerators (HPU)](https://docs.habana.ai/en/latest/index.html), check out [Optimum Habana](https://github.com/huggingface/optimum-habana) which provides a set of tools enabling easy model loading, training and inference on single- and multi-HPU settings for different downstream tasks. After training your model, feel free to submit it to the Intel [leaderboard](https://huggingface.co/spaces/Intel/powered_by_intel_llm_leaderboard) which is designed to evaluate, score, and rank open-source LLMs that have been pre-trained or fine-tuned on Intel Hardwares. Models submitted to the leaderboard will be evaluated on the Intel Developer Cloud. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from the Eleuther AI Language Model Evaluation Harness.

docs/source/inference.mdx

+14-4
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,12 @@ As shown in the table below, each task is associated with a class enabling to au
2828
| `image-classification` | `OVModelForImageClassification` |
2929
| `feature-extraction` | `OVModelForFeatureExtraction` |
3030
| `fill-mask` | `OVModelForMaskedLM` |
31-
| `text-generation` | `OVModelForCausalLM` |
32-
| `text2text-generation` | `OVModelForSeq2SeqLM` |
31+
| `image-classification` | `OVModelForImageClassification` |
32+
| `audio-classification` | `OVModelForAudioClassification` |
33+
| `text-generation-with-past` | `OVModelForCausalLM` |
34+
| `text2text-generation-with-past` | `OVModelForSeq2SeqLM` |
35+
| `automatic-speech-recognition` | `OVModelForSpeechSeq2Seq` |
36+
| `image-to-text` | `OVModelForVision2Seq` |
3337

3438

3539
### Export
@@ -42,14 +46,20 @@ optimum-cli export openvino --model gpt2 ov_model
4246

4347
The example above illustrates exporting a checkpoint from the 🤗 Hub. When exporting a local model, first make sure that you saved both the model’s weights and tokenizer files in the same directory (`local_path`).
4448
When using CLI, pass the `local_path` to the model argument instead of the checkpoint name of the model hosted on the Hub and provide the `--task` argument. You can review the list of supported tasks in the 🤗 [Optimum documentation](https://huggingface.co/docs/optimum/exporters/task_manager). If task argument is not provided, it will default to the model architecture without any task specific head.
45-
Here we set the `task` to `text-generation-with-past`, with the `-with-past` suffix enabling the re-use of the pre-computed key/values hidden-states `use_cache=True`.
49+
The `-with-past` suffix enable the re-use of the pre-computed key/values hidden-states and is the recommended option, to export the model without (equivalent to `use_cache=False`), you will need to remove this suffix.
4650

4751
```bash
4852
optimum-cli export openvino --model local_path --task text-generation-with-past ov_model
4953
```
5054

5155
To export your model in fp16, you can add `--weight-format fp16` when exporting your model.
5256

57+
<Tip warning={true}>
58+
59+
Models larger than 1 billion parameters are exported to the OpenVINO format with 8-bit weights by default. You can disable it with `--weight-format fp32`.
60+
61+
</Tip>
62+
5363
Once the model is exported, you can load the OpenVINO model using :
5464

5565
```python
@@ -126,7 +136,7 @@ model = OVModelForCausalLM.from_pretrained(model_id, load_in_8bit=True)
126136

127137
<Tip warning={true}>
128138

129-
`load_in_8bit` is enabled by default for the models larger than 1 billion parameters. You can disable it with `load_in_8bit=False`.
139+
If not specified, `load_in_8bit` will be set to `True` by default when models larger than 1 billion parameters are exported to the OpenVINO format (with `export=True`). You can disable it with `load_in_8bit=False`.
130140

131141
</Tip>
132142

docs/source/optimization_ov.mdx

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ model.save_pretrained(saving_directory)
4444

4545
<Tip warning={true}>
4646

47-
`load_in_8bit` is enabled by default for the models larger than 1 billion parameters. You can disable it with `load_in_8bit=False`.
47+
If not specified, `load_in_8bit` will be set to `True` by default when models larger than 1 billion parameters are exported to the OpenVINO format (with `export=True`). You can disable it with `load_in_8bit=False`.
4848

4949
</Tip>
5050

docs/source/reference_ov.mdx

+74-17
Original file line numberDiff line numberDiff line change
@@ -14,56 +14,113 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
-->
1616

17-
# Reference
17+
# Models
1818

19-
## OVModelForFeatureExtraction
19+
## Natural Language Processing
2020

21-
[[autodoc]] openvino.modeling.OVModelForFeatureExtraction
21+
The following classes are available for the following natural language processing tasks.
22+
23+
### OVModelForCausalLM
24+
25+
[[autodoc]] openvino.modeling_decoder.OVModelForCausalLM
26+
- forward
27+
- generate
2228

23-
## OVModelForMaskedLM
29+
### OVModelForMaskedLM
2430

2531
[[autodoc]] openvino.modeling.OVModelForMaskedLM
32+
- forward
33+
34+
### OVModelForSeq2SeqLM
35+
36+
[[autodoc]] openvino.modeling_seq2seq.OVModelForSeq2SeqLM
37+
- forward
2638

27-
## OVModelForQuestionAnswering
39+
### OVModelForQuestionAnswering
2840

2941
[[autodoc]] openvino.modeling.OVModelForQuestionAnswering
42+
- forward
3043

31-
## OVModelForSequenceClassification
44+
### OVModelForSequenceClassification
3245

3346
[[autodoc]] openvino.modeling.OVModelForSequenceClassification
47+
- forward
3448

35-
## OVModelForTokenClassification
49+
### OVModelForTokenClassification
3650

3751
[[autodoc]] openvino.modeling.OVModelForTokenClassification
52+
- forward
3853

39-
## OVModelForAudioClassification
54+
55+
## Audio
56+
57+
The following classes are available for the following audio tasks.
58+
59+
### OVModelForAudioClassification
4060

4161
[[autodoc]] openvino.modeling.OVModelForAudioClassification
62+
- forward
4263

43-
## OVModelForAudioFrameClassification
64+
### OVModelForAudioFrameClassification
4465

4566
[[autodoc]] openvino.modeling.OVModelForAudioFrameClassification
67+
- forward
4668

47-
## OVModelForCTC
69+
### OVModelForCTC
4870

4971
[[autodoc]] openvino.modeling.OVModelForCTC
72+
- forward
5073

51-
## OVModelForAudioXVector
74+
### OVModelForAudioXVector
5275

5376
[[autodoc]] openvino.modeling.OVModelForAudioXVector
77+
- forward
78+
79+
### OVModelForSpeechSeq2Seq
80+
81+
[[autodoc]] openvino.modeling_seq2seq.OVModelForSpeechSeq2Seq
82+
- forward
83+
84+
85+
## Computer Vision
5486

55-
## OVModelForImageClassification
87+
The following classes are available for the following computer vision tasks.
88+
89+
### OVModelForImageClassification
5690

5791
[[autodoc]] openvino.modeling.OVModelForImageClassification
92+
- forward
5893

59-
## OVModelForCausalLM
6094

61-
[[autodoc]] openvino.modeling_decoder.OVModelForCausalLM
95+
## Multimodal
6296

63-
## OVModelForSeq2SeqLM
97+
The following classes are available for the following multimodal tasks.
6498

65-
[[autodoc]] openvino.modeling_seq2seq.OVModelForSeq2SeqLM
99+
### OVModelForVision2Seq
100+
101+
[[autodoc]] openvino.modeling_seq2seq.OVModelForVision2Seq
102+
- forward
103+
104+
### OVModelForPix2Struct
105+
106+
[[autodoc]] openvino.modeling_seq2seq.OVModelForPix2Struct
107+
- forward
108+
109+
## Custom Tasks
110+
111+
### OVModelForCustomTasks
112+
113+
[[autodoc]] openvino.modeling.OVModelForCustomTasks
114+
- forward
115+
116+
### OVModelForFeatureExtraction
117+
118+
[[autodoc]] openvino.modeling.OVModelForFeatureExtraction
119+
- forward
120+
121+
122+
# Quantization
66123

67-
## OVQuantizer
124+
### OVQuantizer
68125

69126
[[autodoc]] openvino.quantization.OVQuantizer

examples/openvino/image-classification/configs/swin-base-jpqd.json

-2
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@
3636
"ignored_scopes": [
3737
"{re}.*__add___[0-1]",
3838
"{re}.*layer_norm_0",
39-
"{re}.*matmul_1",
40-
"{re}.*__truediv__*"
4139
]
4240
}
4341
]

examples/openvino/question-answering/configs/bert-base-jpqd.json

-2
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@
3636
"ignored_scopes": [
3737
"{re}.*__add___[0-1]",
3838
"{re}.*layer_norm_0",
39-
"{re}.*matmul_1",
40-
"{re}.*__truediv__*"
4139
]
4240
}
4341
]

examples/openvino/text-classification/configs/bert-base-jpqd.json

-2
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@
4040
"ignored_scopes": [
4141
"{re}.*__add___[0-1]",
4242
"{re}.*layer_norm_0",
43-
"{re}.*matmul_1",
44-
"{re}.*__truediv__*"
4543
]
4644
}
4745
]

notebooks/openvino/quantized_generation_demo.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
"metadata": {},
3333
"outputs": [],
3434
"source": [
35-
"# ! pip install optimum[openvino,nncf] torch"
35+
"# ! pip install optimum[openvino,nncf] torch==2.2.2"
3636
]
3737
},
3838
{

optimum/exporters/ipex/model_patcher.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from optimum.intel.utils.import_utils import is_ipex_version
2424

2525
from .modeling_utils import (
26+
_IPEX_MINIMUM_VERSION_FOR_PATCHING,
2627
_IPEXLlamaDecoderLayerRef,
2728
_llama_attn_forward,
2829
_llama_layer_norm_forward,
@@ -62,18 +63,20 @@ def patch_op(m, target_m, new_op_name, new_op):
6263

6364

6465
def _patch_llama_model(model):
65-
if is_ipex_version("<", "2.5.0"):
66-
raise ImportError("Only ipex version > 2.3.0 supports RotaryEmbedding and IndirectAccessKVCache")
66+
if is_ipex_version("<", _IPEX_MINIMUM_VERSION_FOR_PATCHING):
67+
raise ImportError(
68+
f"Only ipex version >= {_IPEX_MINIMUM_VERSION_FOR_PATCHING} supports RotaryEmbedding and IndirectAccessKVCacheAttention"
69+
)
6770

68-
from intel_extension_for_pytorch.llm.modules import IndirectAccessKVCache, RotaryEmbedding
71+
from intel_extension_for_pytorch.llm.modules import IndirectAccessKVCacheAttention, RotaryEmbedding
6972

7073
ipex_rope = RotaryEmbedding(
7174
model.config.max_position_embeddings,
7275
model.config.hidden_size // model.config.num_attention_heads,
7376
model.config.rope_theta,
7477
model.config.architectures[0],
7578
)
76-
ipex_scale_dot_product = IndirectAccessKVCache(text_max_length=model.config.max_position_embeddings)
79+
ipex_scale_dot_product = IndirectAccessKVCacheAttention(text_max_length=model.config.max_position_embeddings)
7780
patch_op(model, LlamaAttention, "ipex_rope", ipex_rope)
7881
patch_op(model, LlamaAttention, "ipex_scale_dot_product", ipex_scale_dot_product)
7982

0 commit comments

Comments
 (0)