Skip to content

Commit f431f7b

Browse files
committed
Merge branch 'test' of https://github.com/NoushNabi/optimum-intel into test
2 parents 4c98dc4 + 444860d commit f431f7b

24 files changed

+742
-117
lines changed

.github/workflows/test_openvino.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
python -m pip install --upgrade pip
3333
# install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU
3434
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
35-
pip install .[openvino,openvino-tokenizers,nncf,tests,diffusers]
35+
pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
3636
- name: Test with Pytest
3737
run: |
3838
pytest tests/openvino/ --ignore test_modeling_basic

docs/source/optimization_ov.mdx

+11-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,17 @@ from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
8282

8383
model = OVModelForCausalLM.from_pretrained(
8484
model_id,
85-
export=True,
85+
quantization_config=OVWeightQuantizationConfig(bits=4),
86+
)
87+
```
88+
89+
You can tune quantization parameters to achieve a better performance accuracy trade-off as follows:
90+
91+
```python
92+
from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
93+
94+
model = OVModelForCausalLM.from_pretrained(
95+
model_id,
8696
quantization_config=OVWeightQuantizationConfig(bits=4, sym=False, ratio=0.8, dataset="ptb"),
8797
)
8898
```

examples/openvino/image-classification/run_image_classification.py

+8-10
Original file line numberDiff line numberDiff line change
@@ -151,12 +151,12 @@ class ModelArguments:
151151
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
152152
)
153153
feature_extractor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
154-
use_auth_token: bool = field(
155-
default=False,
154+
token: str = field(
155+
default=None,
156156
metadata={
157157
"help": (
158-
"Will use the token generated when running `huggingface-cli login` (necessary to use this script "
159-
"with private models)."
158+
"The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
159+
"generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
160160
)
161161
},
162162
)
@@ -239,8 +239,7 @@ def main():
239239
data_args.dataset_name,
240240
data_args.dataset_config_name,
241241
cache_dir=model_args.cache_dir,
242-
task="image-classification",
243-
use_auth_token=True if model_args.use_auth_token else None,
242+
token=model_args.token,
244243
)
245244
else:
246245
data_files = {}
@@ -252,7 +251,6 @@ def main():
252251
"imagefolder",
253252
data_files=data_files,
254253
cache_dir=model_args.cache_dir,
255-
task="image-classification",
256254
)
257255

258256
# If we don't have a validation split, split off a percentage of train as validation.
@@ -287,15 +285,15 @@ def compute_metrics(p):
287285
finetuning_task="image-classification",
288286
cache_dir=model_args.cache_dir,
289287
revision=model_args.model_revision,
290-
use_auth_token=True if model_args.use_auth_token else None,
288+
token=model_args.token,
291289
)
292290
model = AutoModelForImageClassification.from_pretrained(
293291
model_args.model_name_or_path,
294292
from_tf=bool(".ckpt" in model_args.model_name_or_path),
295293
config=config,
296294
cache_dir=model_args.cache_dir,
297295
revision=model_args.model_revision,
298-
use_auth_token=True if model_args.use_auth_token else None,
296+
token=model_args.token,
299297
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
300298
)
301299

@@ -311,7 +309,7 @@ def compute_metrics(p):
311309
model_args.feature_extractor_name or model_args.model_name_or_path,
312310
cache_dir=model_args.cache_dir,
313311
revision=model_args.model_revision,
314-
use_auth_token=True if model_args.use_auth_token else None,
312+
token=model_args.token,
315313
)
316314

317315
# Define torchvision transforms to be applied to each image.

optimum/exporters/ipex/__init__.py

Whitespace-only changes.
+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# Copyright 2024 The HuggingFace Team. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from transformers.models.llama.modeling_llama import (
16+
LlamaAttention,
17+
LlamaDecoderLayer,
18+
LlamaForCausalLM,
19+
LlamaModel,
20+
LlamaRMSNorm,
21+
)
22+
23+
from optimum.intel.utils.import_utils import is_ipex_version
24+
25+
from .modeling_utils import (
26+
_IPEXLlamaDecoderLayerRef,
27+
_llama_attn_forward,
28+
_llama_layer_norm_forward,
29+
_llama_model_forward,
30+
)
31+
32+
33+
_IPEX_EXPORTED_ARCH = ("LlamaForCausalLM",)
34+
_IPEX_EXPORTED_TASK = ("text-generation",)
35+
36+
37+
def convert_func(m, func_name, new_function):
38+
bound_method = new_function.__get__(m, m.__class__)
39+
setattr(m, func_name, bound_method)
40+
41+
42+
def convert_functions(m, target_m, new_function_name, new_function):
43+
for _, sub_m in m.named_children():
44+
if isinstance(sub_m, target_m):
45+
convert_func(sub_m, new_function_name, new_function)
46+
convert_functions(sub_m, target_m, new_function_name, new_function)
47+
48+
49+
def convert_class(m, target_m, new_class, config, distributed=False):
50+
for name, sub_m in m.named_children():
51+
if isinstance(sub_m, target_m):
52+
new_m = new_class(sub_m, config, distributed)
53+
setattr(m, name, new_m)
54+
convert_class(sub_m, target_m, new_class, config, distributed)
55+
56+
57+
def patch_op(m, target_m, new_op_name, new_op):
58+
for name, sub_m in m.named_children():
59+
if isinstance(sub_m, target_m):
60+
setattr(sub_m, new_op_name, new_op)
61+
patch_op(sub_m, target_m, new_op_name, new_op)
62+
63+
64+
def _patch_llama_model(model):
65+
if is_ipex_version("<", "2.5.0"):
66+
raise ImportError("Only ipex version > 2.3.0 supports RotaryEmbedding and IndirectAccessKVCache")
67+
68+
from intel_extension_for_pytorch.llm.modules import IndirectAccessKVCache, RotaryEmbedding
69+
70+
ipex_rope = RotaryEmbedding(
71+
model.config.max_position_embeddings,
72+
model.config.hidden_size // model.config.num_attention_heads,
73+
model.config.rope_theta,
74+
model.config.architectures[0],
75+
)
76+
ipex_scale_dot_product = IndirectAccessKVCache(text_max_length=model.config.max_position_embeddings)
77+
patch_op(model, LlamaAttention, "ipex_rope", ipex_rope)
78+
patch_op(model, LlamaAttention, "ipex_scale_dot_product", ipex_scale_dot_product)
79+
80+
convert_functions(model, LlamaModel, "forward", _llama_model_forward)
81+
convert_functions(model, LlamaAttention, "forward", _llama_attn_forward)
82+
convert_functions(model, LlamaRMSNorm, "forward", _llama_layer_norm_forward)
83+
84+
convert_class(model, LlamaDecoderLayer, _IPEXLlamaDecoderLayerRef, model.config)
85+
return model
86+
87+
88+
def _patch_model(model):
89+
if isinstance(model, LlamaForCausalLM):
90+
model = _patch_llama_model(model)
91+
return model

0 commit comments

Comments
 (0)