Skip to content

Commit d2350c0

Browse files
committed
resolve conflict
2 parents ca46673 + 096d94b commit d2350c0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+2508
-393
lines changed

.github/workflows/test_inc.yml

+2-3
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
python -m pip install --upgrade pip
3333
pip install cmake
3434
pip install py-cpuinfo
35-
pip install torch==2.2 torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
35+
pip install torch==2.3.0 torchaudio==2.3.0 torchvision==0.18 --index-url https://download.pytorch.org/whl/cpu
3636
pip install .[neural-compressor,diffusers,tests]
3737
pip install intel-extension-for-transformers
3838
pip install peft
@@ -43,7 +43,6 @@ jobs:
4343
- name: Test IPEX
4444
run: |
4545
pip uninstall -y intel-extension-for-transformers
46-
pip install torch==2.1.0 torchaudio==2.1.0 torchvision==0.16 --extra-index-url https://download.pytorch.org/whl/cpu
47-
pip install intel-extension-for-pytorch==2.1.100
46+
pip install intel-extension-for-pytorch==2.3.0
4847
pytest tests/neural_compressor/test_ipex.py
4948

.github/workflows/test_ipex.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
- name: Install dependencies
3131
run: |
3232
python -m pip install --upgrade pip
33-
pip install torch==2.2 torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
33+
pip install torch torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
3434
pip install .[ipex,tests]
3535
- name: Test with Pytest
3636
run: |

README.md

+5
Original file line numberDiff line numberDiff line change
@@ -239,3 +239,8 @@ Do not forget to install requirements for every example:
239239
cd <example-folder>
240240
pip install -r requirements.txt
241241
```
242+
243+
244+
## Gaudi
245+
246+
To train your model on [Intel Gaudi AI Accelerators (HPU)](https://docs.habana.ai/en/latest/index.html), check out [Optimum Habana](https://github.com/huggingface/optimum-habana) which provides a set of tools enabling easy model loading, training and inference on single- and multi-HPU settings for different downstream tasks. After training your model, feel free to submit it to the Intel [leaderboard](https://huggingface.co/spaces/Intel/powered_by_intel_llm_leaderboard) which is designed to evaluate, score, and rank open-source LLMs that have been pre-trained or fine-tuned on Intel Hardwares. Models submitted to the leaderboard will be evaluated on the Intel Developer Cloud. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from the Eleuther AI Language Model Evaluation Harness.

examples/openvino/image-classification/configs/swin-base-jpqd.json

-2
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@
3636
"ignored_scopes": [
3737
"{re}.*__add___[0-1]",
3838
"{re}.*layer_norm_0",
39-
"{re}.*matmul_1",
40-
"{re}.*__truediv__*"
4139
]
4240
}
4341
]

examples/openvino/question-answering/configs/bert-base-jpqd.json

-2
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@
3636
"ignored_scopes": [
3737
"{re}.*__add___[0-1]",
3838
"{re}.*layer_norm_0",
39-
"{re}.*matmul_1",
40-
"{re}.*__truediv__*"
4139
]
4240
}
4341
]

examples/openvino/text-classification/configs/bert-base-jpqd.json

-2
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@
4040
"ignored_scopes": [
4141
"{re}.*__add___[0-1]",
4242
"{re}.*layer_norm_0",
43-
"{re}.*matmul_1",
44-
"{re}.*__truediv__*"
4543
]
4644
}
4745
]

notebooks/openvino/quantized_generation_demo.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
"metadata": {},
3333
"outputs": [],
3434
"source": [
35-
"# ! pip install optimum[openvino,nncf] torch"
35+
"# ! pip install optimum[openvino,nncf] torch==2.2.2"
3636
]
3737
},
3838
{

notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb

+10-4
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@
5252
"import transformers\n",
5353
"from pathlib import Path\n",
5454
"from openvino.runtime import Core\n",
55-
"from optimum.intel import OVStableDiffusionPipeline, OVWeightQuantizationConfig\n",
55+
"from optimum.intel import OVConfig, OVQuantizer, OVStableDiffusionPipeline, OVWeightQuantizationConfig\n",
56+
"from optimum.intel.openvino.configuration import OVQuantizationMethod\n",
5657
"\n",
5758
"transformers.logging.set_verbosity_error()\n",
5859
"datasets.logging.set_verbosity_error()"
@@ -198,9 +199,14 @@
198199
},
199200
"outputs": [],
200201
"source": [
201-
"quantization_config = OVWeightQuantizationConfig(bits=8, dataset=calibration_dataset, num_samples=NUM_SAMPLES)\n",
202-
"int8_pipe = OVStableDiffusionPipeline.from_pretrained(model_id=MODEL_ID, export=True, quantization_config=quantization_config)\n",
203-
"int8_pipe.save_pretrained(int8_model_path)"
202+
"int8_pipe = OVStableDiffusionPipeline.from_pretrained(model_id=MODEL_ID, export=True)\n",
203+
"quantization_config = OVWeightQuantizationConfig(bits=8, num_samples=NUM_SAMPLES, quant_method=OVQuantizationMethod.HYBRID)\n",
204+
"quantizer = OVQuantizer(int8_pipe)\n",
205+
"quantizer.quantize(\n",
206+
" ov_config=OVConfig(quantization_config=quantization_config),\n",
207+
" calibration_dataset=calibration_dataset,\n",
208+
" save_directory=int8_model_path\n",
209+
")"
204210
]
205211
},
206212
{

optimum/commands/export/openvino.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,15 @@ def parse_args_openvino(parser: "ArgumentParser"):
119119
"or ['conceptual_captions','laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit'] for diffusion models."
120120
),
121121
)
122+
optional_group.add_argument(
123+
"--all-layers",
124+
action="store_true",
125+
default=None,
126+
help=(
127+
"Whether embeddings and last MatMul layers should be compressed to INT4. If not provided an weight "
128+
"compression is applied, they are compressed to INT8."
129+
),
130+
)
122131
optional_group.add_argument(
123132
"--disable-stateful",
124133
action="store_true",
@@ -198,6 +207,7 @@ def run(self):
198207
and self.args.ratio is None
199208
and self.args.group_size is None
200209
and self.args.sym is None
210+
and self.args.all_layers is None
201211
and self.args.model in _DEFAULT_4BIT_CONFIGS
202212
):
203213
quantization_config = _DEFAULT_4BIT_CONFIGS[self.args.model]
@@ -207,6 +217,7 @@ def run(self):
207217
"ratio": 1 if is_int8 else (self.args.ratio or 0.8),
208218
"sym": self.args.sym or False,
209219
"group_size": -1 if is_int8 else self.args.group_size,
220+
"all_layers": None if is_int8 else self.args.all_layers,
210221
}
211222

212223
if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:
@@ -226,6 +237,9 @@ def run(self):
226237
)
227238
library_name = "transformers"
228239

240+
if self.args.convert_tokenizer:
241+
logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.")
242+
229243
if (
230244
library_name == "diffusers"
231245
and ov_config
@@ -261,10 +275,21 @@ def run(self):
261275
)
262276
model.save_pretrained(self.args.output)
263277

264-
else:
265-
if self.args.convert_tokenizer:
266-
logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.")
278+
if self.args.disable_convert_tokenizer:
279+
return
280+
281+
# avoid import when using other exporters (IPEX, INC)
282+
from ...exporters.openvino.convert import export_tokenizer
267283

284+
output = Path(self.args.output)
285+
tokenizer = getattr(model, "tokenizer", None)
286+
if tokenizer is not None:
287+
export_tokenizer(tokenizer, output / "tokenizer")
288+
289+
tokenizer_2 = getattr(model, "tokenizer_2", None)
290+
if tokenizer_2 is not None:
291+
export_tokenizer(tokenizer_2, output / "tokenizer_2")
292+
else:
268293
# TODO : add input shapes
269294
main_export(
270295
model_name_or_path=self.args.model,

optimum/exporters/openvino/__main__.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from optimum.exporters import TasksManager
2525
from optimum.exporters.onnx.base import OnnxConfig
2626
from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED
27-
from optimum.exporters.openvino.convert import export_from_model, export_tokenizer
27+
from optimum.exporters.openvino.convert import export_from_model
2828
from optimum.intel.utils.import_utils import is_openvino_tokenizers_available, is_transformers_version
2929
from optimum.utils.save_utils import maybe_load_preprocessors
3030

@@ -219,6 +219,10 @@ def main_export(
219219
model_type = config.model_type.replace("_", "-")
220220
if model_type not in TasksManager._SUPPORTED_MODEL_TYPE:
221221
custom_architecture = True
222+
if custom_export_configs is None:
223+
raise ValueError(
224+
f"Trying to export a {model_type} model, that is a custom or unsupported architecture, but no custom export configuration was passed as `custom_export_configs`. Please refer to https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#custom-export-of-transformers-models for an example on how to export custom models. Please open an issue at https://github.com/huggingface/optimum-intel/issues if you would like the model type {model_type} to be supported natively in the OpenVINO export."
225+
)
222226
elif task not in TasksManager.get_supported_tasks_for_model_type(
223227
model_type, exporter="openvino", library_name=library_name
224228
):
@@ -232,6 +236,7 @@ def main_export(
232236
raise ValueError(
233237
f"Asked to export a {model_type} model for the task {task}{autodetected_message}, but the Optimum OpenVINO exporter only supports the tasks {', '.join(model_tasks.keys())} for {model_type}. Please use a supported task. Please open an issue at https://github.com/huggingface/optimum/issues if you would like the task {task} to be supported in the ONNX export for {model_type}."
234238
)
239+
235240
if is_transformers_version(">=", "4.36") and model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED:
236241
loading_kwargs["attn_implementation"] = "eager"
237242
# there are some difference between remote and in library representation of past key values for some models,
@@ -355,6 +360,9 @@ class StoreAttr(object):
355360
**kwargs_shapes,
356361
)
357362

363+
# hide openvino import when using other exporters
364+
from optimum.exporters.openvino.convert import export_tokenizer
365+
358366
if convert_tokenizer and is_openvino_tokenizers_available():
359367
if library_name != "diffusers":
360368
tokenizer = next(
@@ -373,11 +381,11 @@ class StoreAttr(object):
373381
else:
374382
tokenizer = getattr(model, "tokenizer", None)
375383
if tokenizer is not None:
376-
export_tokenizer(tokenizer, output)
384+
export_tokenizer(tokenizer, output / "tokenizer")
377385

378386
tokenizer_2 = getattr(model, "tokenizer_2", None)
379387
if tokenizer_2 is not None:
380-
export_tokenizer(tokenizer_2, output, suffix="_2")
388+
export_tokenizer(tokenizer_2, output / "tokenizer_2")
381389
elif convert_tokenizer and not is_openvino_tokenizers_available():
382390
logger.warning("Tokenizer won't be converted.")
383391

optimum/exporters/openvino/convert.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ def export_from_model(
547547
# TODO: support onnx_config.py in the model repo
548548
if custom_architecture and custom_export_configs is None:
549549
raise ValueError(
550-
f"Trying to export a {model_type} model, that is a custom or unsupported architecture, but no custom export configuration was passed as `custom_export_configs`. Please refer to https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#custom-export-of-transformers-models for an example on how to export custom models. Please open an issue at https://github.com/huggingface/optimum/issues if you would like the model type {model_type} to be supported natively in the ONNX export."
550+
f"Trying to export a {model_type} model, that is a custom or unsupported architecture, but no custom export configuration was passed as `custom_export_configs`. Please refer to https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#custom-export-of-transformers-models for an example on how to export custom models. Please open an issue at https://github.com/huggingface/optimum-intel/issues if you would like the model type {model_type} to be supported natively in the OpenVINO export."
551551
)
552552

553553
if task.startswith("text-generation") and model.config.is_encoder_decoder:
@@ -614,7 +614,12 @@ def export_from_model(
614614
model.config.save_pretrained(output)
615615
generation_config = getattr(model, "generation_config", None)
616616
if generation_config is not None:
617-
generation_config.save_pretrained(output)
617+
try:
618+
generation_config.save_pretrained(output)
619+
except Exception as exception:
620+
logger.warning(
621+
f"The generation config will not be saved, saving failed with following error:\n{exception}"
622+
)
618623

619624
model_name_or_path = model.config._name_or_path
620625
maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code)
@@ -667,20 +672,21 @@ def export_tokenizer(
667672
output: Union[str, Path],
668673
suffix: Optional[str] = "",
669674
):
670-
from optimum.intel.openvino import OV_DETOKENIZER_NAME, OV_TOKENIZER_NAME # avoid circular imports
675+
# avoid circular imports
676+
from optimum.intel.openvino import OV_DETOKENIZER_NAME, OV_TOKENIZER_NAME
677+
from optimum.intel.openvino.utils import maybe_convert_tokenizer_to_fast
671678

672679
try:
673680
from openvino_tokenizers import convert_tokenizer
674681
except ModuleNotFoundError:
675-
# avoid this message before tokenizers are part of the openvino dependencies
676-
# logger.info(
677-
# "Run `pip install openvino-tokenizers[transformers]` to get OpenVINO tokenizer/detokenizer models."
678-
# )
679682
return
680683

681684
if not isinstance(output, Path):
682685
output = Path(output)
683686

687+
if output.exists():
688+
tokenizer = maybe_convert_tokenizer_to_fast(tokenizer, output)
689+
684690
try:
685691
converted = convert_tokenizer(tokenizer, with_detokenizer=True)
686692
except NotImplementedError:

0 commit comments

Comments
 (0)