Skip to content

Commit 3b8900d

Browse files
committed
Merge branch 'ipex-cpu' into ipex-xpu
2 parents 872a3eb + d1d0ca0 commit 3b8900d

25 files changed

+2152
-319
lines changed

.github/workflows/test_inc.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
python -m pip install --upgrade pip
3333
pip install cmake
3434
pip install py-cpuinfo
35-
pip install torch==2.2 torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
35+
pip install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cpu
3636
pip install .[neural-compressor,diffusers,tests]
3737
pip install intel-extension-for-transformers
3838
pip install peft
@@ -43,7 +43,7 @@ jobs:
4343
- name: Test IPEX
4444
run: |
4545
pip uninstall -y intel-extension-for-transformers
46-
pip install torch==2.1.0 torchaudio==2.1.0 torchvision==0.16 --extra-index-url https://download.pytorch.org/whl/cpu
47-
pip install intel-extension-for-pytorch==2.1.100
46+
pip install torch==2.3.0 torchaudio==2.3.0 torchvision==0.18 --extra-index-url https://download.pytorch.org/whl/cpu
47+
pip install intel-extension-for-pytorch==2.3.0
4848
pytest tests/neural_compressor/test_ipex.py
4949

.github/workflows/test_ipex.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
- name: Install dependencies
3131
run: |
3232
python -m pip install --upgrade pip
33-
pip install torch==2.2 torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
33+
pip install torch torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
3434
pip install .[ipex,tests]
3535
- name: Test with Pytest
3636
run: |

optimum/commands/export/openvino.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,15 @@ def parse_args_openvino(parser: "ArgumentParser"):
119119
"or ['conceptual_captions','laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit'] for diffusion models."
120120
),
121121
)
122+
optional_group.add_argument(
123+
"--all-layers",
124+
action="store_true",
125+
default=None,
126+
help=(
127+
"Whether embeddings and last MatMul layers should be compressed to INT4. If not provided an weight "
128+
"compression is applied, they are compressed to INT8."
129+
),
130+
)
122131
optional_group.add_argument(
123132
"--disable-stateful",
124133
action="store_true",
@@ -198,6 +207,7 @@ def run(self):
198207
and self.args.ratio is None
199208
and self.args.group_size is None
200209
and self.args.sym is None
210+
and self.args.all_layers is None
201211
and self.args.model in _DEFAULT_4BIT_CONFIGS
202212
):
203213
quantization_config = _DEFAULT_4BIT_CONFIGS[self.args.model]
@@ -207,6 +217,7 @@ def run(self):
207217
"ratio": 1 if is_int8 else (self.args.ratio or 0.8),
208218
"sym": self.args.sym or False,
209219
"group_size": -1 if is_int8 else self.args.group_size,
220+
"all_layers": None if is_int8 else self.args.all_layers,
210221
}
211222

212223
if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:
@@ -226,6 +237,9 @@ def run(self):
226237
)
227238
library_name = "transformers"
228239

240+
if self.args.convert_tokenizer:
241+
logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.")
242+
229243
if (
230244
library_name == "diffusers"
231245
and ov_config
@@ -261,10 +275,21 @@ def run(self):
261275
)
262276
model.save_pretrained(self.args.output)
263277

264-
else:
265-
if self.args.convert_tokenizer:
266-
logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.")
278+
if self.args.disable_convert_tokenizer:
279+
return
280+
281+
# avoid import when using other exporters (IPEX, INC)
282+
from ...exporters.openvino.convert import export_tokenizer
267283

284+
output = Path(self.args.output)
285+
tokenizer = getattr(model, "tokenizer", None)
286+
if tokenizer is not None:
287+
export_tokenizer(tokenizer, output / "tokenizer")
288+
289+
tokenizer_2 = getattr(model, "tokenizer_2", None)
290+
if tokenizer_2 is not None:
291+
export_tokenizer(tokenizer_2, output / "tokenizer_2")
292+
else:
268293
# TODO : add input shapes
269294
main_export(
270295
model_name_or_path=self.args.model,

optimum/exporters/ipex/model_patcher.py

+5-28
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# limitations under the License.
1414

1515
from transformers.models.llama.modeling_llama import (
16-
LlamaAttention,
1716
LlamaDecoderLayer,
1817
LlamaForCausalLM,
1918
LlamaModel,
@@ -24,7 +23,6 @@
2423

2524
from .modeling_utils import (
2625
_IPEXLlamaDecoderLayerRef,
27-
_llama_attn_forward,
2826
_llama_layer_norm_forward,
2927
_llama_model_forward,
3028
)
@@ -63,34 +61,13 @@ def patch_op(m, target_m, new_op_name, new_op):
6361

6462

6563
def _patch_llama_model(model):
66-
6764
ipex_version = "2.1.0" if "xpu" in str(model.device) else "2.5.0"
6865
if is_ipex_version("<", ipex_version):
69-
raise ImportError(f"Only ipex version >= {ipex_version} supports RotaryEmbedding and IndirectAccessKVCache")
70-
71-
if "cpu" in str(model.device):
72-
from intel_extension_for_pytorch.llm.modules import RotaryEmbedding
73-
from intel_extension_for_pytorch.llm.modules import IndirectAccessKVCache
74-
75-
ipex_rope = RotaryEmbedding(
76-
model.config.max_position_embeddings,
77-
model.config.hidden_size // model.config.num_attention_heads,
78-
model.config.rope_theta,
79-
model.config.architectures[0],
80-
)
81-
ipex_scale_dot_product = IndirectAccessKVCache(text_max_length=model.config.max_position_embeddings)
82-
83-
patch_op(model, LlamaAttention, "ipex_rope", ipex_rope)
84-
patch_op(model, LlamaAttention, "ipex_scale_dot_product", ipex_scale_dot_product)
85-
86-
convert_functions(model, LlamaModel, "forward", _llama_model_forward)
87-
convert_functions(model, LlamaAttention, "forward", _llama_attn_forward)
88-
convert_functions(model, LlamaRMSNorm, "forward", _llama_layer_norm_forward)
89-
90-
convert_class(model, LlamaDecoderLayer, _IPEXLlamaDecoderLayerRef, model.config)
91-
else:
92-
convert_class(model, LlamaDecoderLayer, _IPEXLlamaDecoderLayer, model.config)
93-
convert_functions(model, LlamaModel, "forward", _llama_model_forward)
66+
raise ImportError(f"Only ipex version >= {ipex_version} supports llama model patching")
67+
68+
convert_functions(model, LlamaModel, "forward", _llama_model_forward)
69+
convert_functions(model, LlamaRMSNorm, "forward", _llama_layer_norm_forward)
70+
convert_class(model, LlamaDecoderLayer, _IPEXLlamaDecoderLayerRef, model.config)
9471
return model
9572

9673

0 commit comments

Comments
 (0)