Skip to content

Commit 6056612

Browse files
authored
Merge branch 'huggingface:main' into pipeline
2 parents 9e8ce0e + 920b237 commit 6056612

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+1917
-1019
lines changed

.github/workflows/test_inc.yml

+11-4
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,18 @@ jobs:
3232
python -m pip install --upgrade pip
3333
pip install cmake
3434
pip install py-cpuinfo
35-
pip install torch==2.1.0 torchaudio==2.1.0 torchvision==0.16 --extra-index-url https://download.pytorch.org/whl/cpu
35+
pip install torch==2.2 torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
3636
pip install .[neural-compressor,diffusers,tests]
37-
pip install intel-extension-for-pytorch==2.1.100
38-
pip install intel-extension-for-transformers==1.3.2
37+
pip install intel-extension-for-transformers
3938
pip install peft
39+
4040
- name: Test with Pytest
4141
run: |
42-
pytest tests/neural_compressor/
42+
pytest tests/neural_compressor/ --ignore tests/neural_compressor/test_ipex.py --durations=0
43+
- name: Test IPEX
44+
run: |
45+
pip uninstall -y intel-extension-for-transformers
46+
pip install torch==2.1.0 torchaudio==2.1.0 torchvision==0.16 --extra-index-url https://download.pytorch.org/whl/cpu
47+
pip install intel-extension-for-pytorch==2.1.100
48+
pytest tests/neural_compressor/test_ipex.py
49+

.github/workflows/test_ipex.yml

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ jobs:
3030
- name: Install dependencies
3131
run: |
3232
python -m pip install --upgrade pip
33+
pip install torch==2.2 torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
3334
pip install .[ipex,tests]
3435
- name: Test with Pytest
3536
run: |

.github/workflows/test_offline.yaml

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: Offline usage / Python - Test
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
build:
15+
strategy:
16+
fail-fast: false
17+
matrix:
18+
python-version: [3.9]
19+
os: [ubuntu-latest]
20+
21+
runs-on: ${{ matrix.os }}
22+
steps:
23+
- uses: actions/checkout@v3
24+
- name: Setup Python ${{ matrix.python-version }}
25+
uses: actions/setup-python@v3
26+
with:
27+
python-version: ${{ matrix.python-version }}
28+
- name: Install dependencies
29+
run: |
30+
pip install .[tests,openvino]
31+
- name: Test
32+
run: |
33+
HF_HOME=/tmp/ huggingface-cli download hf-internal-testing/tiny-random-gpt2
34+
HF_HOME=/tmp/ HF_HUB_OFFLINE=1 optimum-cli export openvino --model hf-internal-testing/tiny-random-gpt2 gpt2_openvino --task text-generation
35+
36+
huggingface-cli download hf-internal-testing/tiny-random-gpt2
37+
HF_HUB_OFFLINE=1 optimum-cli export openvino --model hf-internal-testing/tiny-random-gpt2 gpt2_openvino --task text-generation
38+
39+
pytest tests/openvino/test_modeling.py -k "test_load_from_hub" -s -vvvvv
40+
HF_HUB_OFFLINE=1 pytest tests/openvino/test_modeling.py -k "test_load_from_hub" -s -vvvvv

.github/workflows/test_openvino.yml

+5-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,11 @@ jobs:
3535
pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
3636
- name: Test with Pytest
3737
run: |
38-
pytest tests/openvino/ --ignore test_modeling_basic --durations=0
38+
pytest tests/openvino/ --ignore tests/openvino/test_modeling_basic.py --durations=0
39+
- name: Test basic
40+
run: |
41+
pip uninstall -y nncf
42+
pytest tests/openvino/test_modeling_basic.py
3943
- name: Test openvino-nightly
4044
run: |
4145
pip uninstall -y openvino

.github/workflows/test_openvino_examples.yml

+10-10
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ on:
77
push:
88
paths:
99
- '.github/workflows/test_openvino_examples.yml'
10-
- 'examples/openvino/*'
10+
- 'examples/openvino/**'
1111
pull_request:
1212
paths:
1313
- '.github/workflows/test_openvino_examples.yml'
14-
- 'examples/openvino/*'
14+
- 'examples/openvino/**'
1515

1616
concurrency:
1717
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -22,9 +22,9 @@ jobs:
2222
strategy:
2323
fail-fast: false
2424
matrix:
25-
python-version: ["3.8", "3.10"]
25+
python-version: ["3.8", "3.11"]
2626

27-
runs-on: ubuntu-20.04
27+
runs-on: ubuntu-22.04
2828

2929
steps:
3030
- uses: actions/checkout@v2
@@ -35,12 +35,12 @@ jobs:
3535

3636
- name: Install dependencies
3737
run: |
38-
pip install optimum[openvino] jstyleson nncf pytest
39-
pip install -r examples/openvino/audio-classification/requirements.txt
40-
pip install -r examples/openvino/image-classification/requirements.txt
41-
pip install -r examples/openvino/question-answering/requirements.txt
42-
pip install -r examples/openvino/text-classification/requirements.txt
38+
pip install .[openvino] jstyleson pytest
39+
pip install -r examples/openvino/audio-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
40+
pip install -r examples/openvino/image-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
41+
pip install -r examples/openvino/question-answering/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
42+
pip install -r examples/openvino/text-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
4343
4444
- name: Test examples
4545
run: |
46-
python -m pytest examples/openvino/test_examples.py
46+
python -m pytest examples/openvino/test_examples.py

.github/workflows/test_openvino_notebooks.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ jobs:
2323
strategy:
2424
fail-fast: false
2525
matrix:
26-
python-version: ["3.8", "3.10"]
26+
python-version: ["3.8", "3.11"]
2727

28-
runs-on: ubuntu-20.04
28+
runs-on: ubuntu-22.04
2929

3030
steps:
3131
- uses: actions/checkout@v2

README.md

+10-3
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,18 @@ It is possible to export your model to the [OpenVINO IR](https://docs.openvino.a
7878
optimum-cli export openvino --model gpt2 ov_model
7979
```
8080

81-
You can also apply 8-bit weight-only quantization when exporting your model : the model linear and embedding weights will be quantized to INT8, the activations will be kept in floating point precision.
81+
You can also apply 8-bit weight-only quantization when exporting your model : the model linear, embedding and convolution weights will be quantized to INT8, the activations will be kept in floating point precision.
8282

8383
```plain
8484
optimum-cli export openvino --model gpt2 --weight-format int8 ov_model
8585
```
8686

87+
Quantization in hybrid mode can be applied to Stable Diffusion pipeline during model export. This involves applying hybrid post-training quantization to the UNet model and weight-only quantization for the rest of the pipeline components. In the hybrid mode, weights in MatMul and Embedding layers are quantized, as well as activations of other layers.
88+
89+
```plain
90+
optimum-cli export openvino --model stabilityai/stable-diffusion-2-1 --dataset conceptual_captions --weight-format int8 ov_model
91+
```
92+
8793
To apply quantization on both weights and activations, you can find more information in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/optimization_ov).
8894

8995
#### Inference:
@@ -122,7 +128,7 @@ Post-training static quantization introduces an additional calibration step wher
122128

123129
```python
124130
from functools import partial
125-
from optimum.intel import OVQuantizer, OVModelForSequenceClassification
131+
from optimum.intel import OVQuantizer, OVModelForSequenceClassification, OVConfig, OVQuantizationConfig
126132
from transformers import AutoTokenizer, AutoModelForSequenceClassification
127133

128134
model_id = "distilbert-base-uncased-finetuned-sst-2-english"
@@ -145,7 +151,8 @@ calibration_dataset = quantizer.get_calibration_dataset(
145151
# The directory where the quantized model will be saved
146152
save_dir = "nncf_results"
147153
# Apply static quantization and save the resulting model in the OpenVINO IR format
148-
quantizer.quantize(calibration_dataset=calibration_dataset, save_directory=save_dir)
154+
ov_config = OVConfig(quantization_config=OVQuantizationConfig())
155+
quantizer.quantize(ov_config=ov_config, calibration_dataset=calibration_dataset, save_directory=save_dir)
149156
# Load the quantized model
150157
optimized_model = OVModelForSequenceClassification.from_pretrained(save_dir)
151158
```

docs/source/optimization_ov.mdx

+3-2
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ Here is how to apply static quantization on a fine-tuned DistilBERT given your o
8484

8585
```python
8686
from transformers import AutoTokenizer
87-
from optimum.intel import OVQuantizer, OVModelForSequenceClassification,
87+
from optimum.intel import OVQuantizer, OVModelForSequenceClassification, OVConfig, OVQuantizationConfig
8888

8989
model_id = "distilbert-base-uncased-finetuned-sst-2-english"
9090
model = OVModelForSequenceClassification.from_pretrained(model_id, export=True)
@@ -95,7 +95,8 @@ save_dir = "ptq_model"
9595
quantizer = OVQuantizer.from_pretrained(model)
9696

9797
# Apply static quantization and export the resulting quantized model to OpenVINO IR format
98-
quantizer.quantize(calibration_dataset=calibration_dataset, save_directory=save_dir)
98+
ov_config = OVConfig(quantization_config=OVQuantizationConfig())
99+
quantizer.quantize(ov_config=ov_config, calibration_dataset=calibration_dataset, save_directory=save_dir)
99100
# Save the tokenizer
100101
tokenizer.save_pretrained(save_dir)
101102
```

examples/neural_compressor/language-modeling/run_clm.py

+25-27
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,11 @@
5757
from transformers.utils.versions import require_version
5858

5959
from optimum.intel.neural_compressor import INCModelForCausalLM, INCQuantizer, INCTrainer
60-
from optimum.intel.utils.import_utils import (
61-
INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
62-
is_intel_extension_for_transformers_available,
63-
)
64-
60+
from optimum.intel.utils.import_utils import ITREX_IMPORT_ERROR, is_itrex_available
6561

66-
if is_intel_extension_for_transformers_available():
67-
from intel_extension_for_transformers.transformers.utils.config import WeightOnlyQuantConfig
6862

63+
if is_itrex_available():
64+
from intel_extension_for_transformers.transformers.utils.config import GPTQConfig, RtnConfig
6965

7066
os.environ["CUDA_VISIBLE_DEVICES"] = ""
7167

@@ -227,8 +223,9 @@ class OptimizationArguments:
227223
metadata={"help": "Scheme for weight only quantization. Choose from 'sym' and 'asym'."},
228224
)
229225
quantization_methodology: str = field(
230-
default="RTN",
231-
metadata={"help": "Quantization methodology for weight only quantization. Choose from 'RTN' and 'GPTQ'."},
226+
choices=["rtn", "gptq"],
227+
default="rtn",
228+
metadata={"help": "Quantization methodology for weight only quantization. Choose from 'rtn' and 'gptq'."},
232229
)
233230
damp_percent: float = field(
234231
default=0.01,
@@ -658,26 +655,27 @@ def compute_metrics(eval_preds):
658655
else:
659656
recipes = {}
660657
if optim_args.quantization_approach == "weight_only":
661-
if not is_intel_extension_for_transformers_available():
662-
raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("WeightOnly quantization"))
658+
if not is_itrex_available():
659+
raise ImportError(ITREX_IMPORT_ERROR.format("WeightOnly quantization"))
663660
if optim_args.apply_pruning or optim_args.apply_distillation:
664661
raise ValueError("Weight only quantization and pruning or distillation cannot be combined.")
665-
if optim_args.quantization_methodology == "GPTQ":
666-
algorithm_args = {
667-
"act_order": False,
668-
"percdamp": optim_args.damp_percent,
669-
"block_size": optim_args.gptq_block_size,
670-
"nsamples": optim_args.num_calibration_samples,
671-
"use_max_length": optim_args.use_max_length,
672-
"pad_max_length": optim_args.pad_max_length,
673-
}
674-
quantization_config = WeightOnlyQuantConfig(
675-
weight_dtype=optim_args.weight_dtype,
676-
group_size=optim_args.group_size,
677-
scheme=optim_args.weight_only_scheme,
678-
algorithm=optim_args.quantization_methodology,
679-
algorithm_args=algorithm_args if optim_args.quantization_methodology == "GPTQ" else None,
680-
)
662+
663+
algorithm_args = {
664+
"weight_dtype": optim_args.weight_dtype,
665+
"sym": optim_args.weight_only_scheme == "sym",
666+
"group_size": optim_args.group_size,
667+
}
668+
669+
if optim_args.quantization_methodology == "gptq":
670+
quantization_config = GPTQConfig(
671+
damp_percent=optim_args.damp_percent,
672+
nsamples=optim_args.num_calibration_samples,
673+
blocksize=optim_args.gptq_block_size,
674+
**algorithm_args,
675+
)
676+
else:
677+
quantization_config = RtnConfig(**algorithm_args)
678+
681679
else:
682680
quantization_config = PostTrainingQuantConfig(
683681
approach=optim_args.quantization_approach, recipes=recipes
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
datasets>=1.14.0
22
evaluate
33
librosa
4-
torchaudio
4+
torchaudio
5+
accelerate

examples/openvino/audio-classification/run_audio_classification.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
from transformers.utils import check_min_version, send_example_telemetry
3636
from transformers.utils.versions import require_version
3737

38-
from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
38+
from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
3939

4040

4141
logger = logging.getLogger(__name__)

examples/openvino/image-classification/requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ datasets >= 1.8.0
22
torch >= 1.9.0
33
torchvision>=0.6.0
44
evaluate
5+
accelerate

examples/openvino/image-classification/run_image_classification.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
from transformers.utils import check_min_version, send_example_telemetry
5353
from transformers.utils.versions import require_version
5454

55-
from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
55+
from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
5656

5757

5858
logger = logging.getLogger(__name__)
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
datasets >= 1.8.0
22
torch >= 1.9.0
33
evaluate
4+
accelerate

examples/openvino/question-answering/run_qa.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
from transformers.utils.versions import require_version
5050
from utils_qa import postprocess_qa_predictions
5151

52-
from optimum.intel.openvino import OVConfig, OVTrainingArguments
52+
from optimum.intel import OVConfig, OVTrainingArguments
5353

5454

5555
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.

examples/openvino/question-answering/trainer_qa.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import torch.nn.functional as F
2121
from transformers.trainer_utils import PredictionOutput
2222

23-
from optimum.intel.openvino.trainer import OVTrainer
23+
from optimum.intel import OVTrainer
2424

2525

2626
class QuestionAnsweringOVTrainer(OVTrainer):

examples/openvino/text-classification/requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ scipy
44
scikit-learn
55
protobuf
66
torch >= 1.3
7-
evaluate
7+
evaluate
8+
accelerate

examples/openvino/text-classification/run_glue.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
from transformers.utils import check_min_version, send_example_telemetry
4747
from transformers.utils.versions import require_version
4848

49-
from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
49+
from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
5050

5151

5252
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.

0 commit comments

Comments
 (0)