Skip to content

Commit 374b1fc

Browse files
committed
Merged with main
2 parents 55a673b + 0f45751 commit 374b1fc

19 files changed

+510
-405
lines changed

.github/workflows/test_ipex.yml

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ jobs:
3030
- name: Install dependencies
3131
run: |
3232
python -m pip install --upgrade pip
33+
pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu
3334
pip install .[ipex,tests]
3435
- name: Test with Pytest
3536
run: |

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,10 @@ It is possible to export your model to the [OpenVINO](https://docs.openvino.ai/2
7878
optimum-cli export openvino --model gpt2 ov_model
7979
```
8080

81-
If you add `--int8`, the model linear and embedding weights will be quantized to INT8, the activations will be kept in floating point precision.
81+
You can also apply 8-bit weight-only quantization when exporting your model : the model linear and embedding weights will be quantized to INT8, the activations will be kept in floating point precision.
8282

8383
```plain
84-
optimum-cli export openvino --model gpt2 --int8 ov_model
84+
optimum-cli export openvino --model gpt2 --weight-format int8 ov_model
8585
```
8686

8787
To apply quantization on both weights and activations, you can find more information in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/optimization_ov).

optimum/exporters/openvino/stateful.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
from openvino.runtime import opset13
2323
from optimum.exporters import TasksManager
2424
from optimum.intel.utils.import_utils import _openvino_version, is_openvino_version
25-
from optimum.utils.normalized_config import NormalizedConfigManager
2625

2726

2827
def model_has_state(ov_model: ov.Model):
@@ -217,9 +216,7 @@ def patch_stateful(config: PretrainedConfig, ov_model: ov.Model):
217216
batch_dim = 1 if config.model_type == "chatglm" else 0
218217

219218
fuse_cache_reorder(ov_model, not_kv_inputs, key_value_input_names, batch_dim)
220-
221-
normalized_config = NormalizedConfigManager.get_normalized_config_class(config.model_type)(config)
222-
num_attention_heads = normalized_config.num_attention_heads if config.model_type == "bloom" else 1
219+
num_attention_heads = config.num_attention_heads if config.model_type == "bloom" else 1
223220
make_stateful(
224221
ov_model, not_kv_inputs, key_value_input_names, key_value_output_names, batch_dim, num_attention_heads, None
225222
)

optimum/intel/__init__.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@
4848
"IPEXModelForMaskedLM",
4949
"IPEXModelForTokenClassification",
5050
"IPEXModelForQuestionAnswering",
51+
"IPEXModelForImageClassification",
52+
"IPEXModelForAudioClassification",
53+
"IPEXModel",
5154
]
5255

53-
5456
try:
5557
if not (is_openvino_available() and is_nncf_available()):
5658
raise OptionalDependencyNotAvailable()
@@ -162,7 +164,10 @@
162164
from .utils.dummy_ipex_objects import *
163165
else:
164166
from .ipex import (
167+
IPEXModel,
168+
IPEXModelForAudioClassification,
165169
IPEXModelForCausalLM,
170+
IPEXModelForImageClassification,
166171
IPEXModelForMaskedLM,
167172
IPEXModelForQuestionAnswering,
168173
IPEXModelForSequenceClassification,

optimum/intel/generation/modeling.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,11 @@ def prepare_jit_inputs(model: PreTrainedModel, task: str, use_cache: bool = Fals
6666

6767
def jit_trace(model: PreTrainedModel, task: str, use_cache: bool = False):
6868
model_inputs = prepare_jit_inputs(model, task, use_cache)
69-
model.config.return_dict = False
69+
model.config.return_dict = task not in {"text-generation", "audio-classification"}
7070
# check if the model_inputs is correct.
7171
model(**model_inputs)
7272

7373
torch._C._jit_set_texpr_fuser_enabled(False)
74-
if "past_key_values" in model_inputs.keys():
75-
model.config.return_dict = False
7674
if is_torch_version(">=", "2.1.0"):
7775
traced_model = torch.jit.trace(model, example_kwarg_inputs=model_inputs, strict=False)
7876
else:

optimum/intel/ipex/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
from optimum.intel.ipex.modeling_base import (
2+
IPEXModel,
3+
IPEXModelForAudioClassification,
24
IPEXModelForCausalLM,
5+
IPEXModelForImageClassification,
36
IPEXModelForMaskedLM,
47
IPEXModelForQuestionAnswering,
58
IPEXModelForSequenceClassification,

optimum/intel/ipex/inference.py

+1-19
Original file line numberDiff line numberDiff line change
@@ -31,25 +31,13 @@
3131
IPEXModelForMaskedLM,
3232
IPEXModelForSequenceClassification,
3333
IPEXModelForTokenClassification,
34-
IPEXBloomForCausalLM,
35-
IPEXMPTForCausalLM,
36-
IPEXOPTForCausalLM,
37-
IPEXGPTBigCodeForCausalLM,
3834
IPEXModelForQuestionAnswering,
3935
)
4036

4137

4238
from .utils import _HEAD_TO_AUTOMODELS
4339

4440

45-
_MODEL_TYPE_TO_AUTOMODELS = {
46-
"bloom": IPEXBloomForCausalLM,
47-
"mpt": IPEXMPTForCausalLM,
48-
"opt": IPEXOPTForCausalLM,
49-
"big_code": IPEXGPTBigCodeForCausalLM,
50-
}
51-
52-
5341
logger = logging.getLogger(__name__)
5442

5543
IPEX_NOT_AVAILABLE_ERROR_MSG = (
@@ -146,13 +134,7 @@ def __enter__(self):
146134
)
147135
if task in _HEAD_TO_AUTOMODELS:
148136
model = jit_trace(model, task, use_cache)
149-
model_type = getattr(self._original.config, "model_type", "").replace("_", "-")
150-
151-
if task == "text-generation" and model_type in _MODEL_TYPE_TO_AUTOMODELS.keys():
152-
auto_model_class = _MODEL_TYPE_TO_AUTOMODELS[task]
153-
else:
154-
auto_model_class = eval(_HEAD_TO_AUTOMODELS[task])
155-
137+
auto_model_class = eval(_HEAD_TO_AUTOMODELS[task])
156138
model = auto_model_class(model, self._original.config, use_cache=use_cache)
157139

158140
# Enable automatic mixed precision (AMP) if we are going to target `bfloat16`

0 commit comments

Comments
 (0)