Skip to content

Commit 1fd464b

Browse files
committed
fix merge conflict
Signed-off-by: Liu, Kaixuan <kaixuan.liu@intel.com>
1 parent b4ba6d0 commit 1fd464b

10 files changed

+234
-177
lines changed

notebooks/ipex/text_generation.ipynb

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
"source": [
2323
"import torch\n",
2424
"from transformers import AutoTokenizer\n",
25+
"\n",
2526
"from optimum.intel.ipex import IPEXModelForCausalLM"
2627
]
2728
},

notebooks/openvino/optimum_openvino_inference.ipynb

+37-12
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
"source": [
7979
"from optimum.intel import OVModelForQuestionAnswering\n",
8080
"\n",
81+
"\n",
8182
"# Load PyTorch model from the Hub and export to OpenVINO in the background\n",
8283
"model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad\", export=True)\n",
8384
"\n",
@@ -122,6 +123,7 @@
122123
"source": [
123124
"from transformers import AutoTokenizer\n",
124125
"\n",
126+
"\n",
125127
"tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-distilled-squad\")\n",
126128
"tokenizer.save_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")"
127129
]
@@ -182,9 +184,11 @@
182184
}
183185
],
184186
"source": [
185-
"from optimum.intel import OVModelForQuestionAnswering\n",
186187
"from transformers import AutoTokenizer, pipeline\n",
187188
"\n",
189+
"from optimum.intel import OVModelForQuestionAnswering\n",
190+
"\n",
191+
"\n",
188192
"model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
189193
"tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-distilled-squad\")\n",
190194
"ov_pipe = pipeline(\"question-answering\", model=model, tokenizer=tokenizer)\n",
@@ -240,9 +244,11 @@
240244
],
241245
"source": [
242246
"import torch\n",
243-
"from optimum.intel import OVModelForQuestionAnswering\n",
244247
"from transformers import AutoTokenizer, pipeline\n",
245248
"\n",
249+
"from optimum.intel import OVModelForQuestionAnswering\n",
250+
"\n",
251+
"\n",
246252
"model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
247253
"tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
248254
"\n",
@@ -324,9 +330,11 @@
324330
}
325331
],
326332
"source": [
327-
"from optimum.intel import OVModelForQuestionAnswering\n",
328333
"from transformers import AutoTokenizer, pipeline\n",
329334
"\n",
335+
"from optimum.intel import OVModelForQuestionAnswering\n",
336+
"\n",
337+
"\n",
330338
"model = OVModelForQuestionAnswering.from_pretrained(\n",
331339
" \"helenai/distilbert-base-uncased-distilled-squad-ov-fp32\", compile=False\n",
332340
")\n",
@@ -411,6 +419,7 @@
411419
"source": [
412420
"from openvino.runtime import Core\n",
413421
"\n",
422+
"\n",
414423
"for device in Core().available_devices:\n",
415424
" print(device, Core().get_property(device, \"FULL_DEVICE_NAME\"))"
416425
]
@@ -528,10 +537,12 @@
528537
}
529538
],
530539
"source": [
540+
"from datasets import load_dataset\n",
531541
"from IPython.display import Audio\n",
532-
"from optimum.intel import OVModelForAudioClassification\n",
533542
"from transformers import AutoFeatureExtractor, pipeline\n",
534-
"from datasets import load_dataset\n",
543+
"\n",
544+
"from optimum.intel import OVModelForAudioClassification\n",
545+
"\n",
535546
"\n",
536547
"model_id = \"helenai/MIT-ast-finetuned-speech-commands-v2-ov\"\n",
537548
"model = OVModelForAudioClassification.from_pretrained(model_id)\n",
@@ -638,9 +649,11 @@
638649
}
639650
],
640651
"source": [
641-
"from optimum.intel import OVModelForCausalLM\n",
642652
"from transformers import AutoTokenizer, pipeline\n",
643653
"\n",
654+
"from optimum.intel import OVModelForCausalLM\n",
655+
"\n",
656+
"\n",
644657
"model_id = \"helenai/gpt2-ov\"\n",
645658
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
646659
"model = OVModelForCausalLM.from_pretrained(model_id)\n",
@@ -704,9 +717,11 @@
704717
],
705718
"source": [
706719
"from IPython.display import Image\n",
707-
"from optimum.intel import OVModelForImageClassification\n",
708720
"from transformers import AutoImageProcessor, pipeline\n",
709721
"\n",
722+
"from optimum.intel import OVModelForImageClassification\n",
723+
"\n",
724+
"\n",
710725
"model_id = \"helenai/microsoft-swin-tiny-patch4-window7-224-ov\"\n",
711726
"model = OVModelForImageClassification.from_pretrained(model_id, compile=False)\n",
712727
"image_processor = AutoImageProcessor.from_pretrained(model_id)\n",
@@ -766,9 +781,11 @@
766781
}
767782
],
768783
"source": [
769-
"from optimum.intel import OVModelForMaskedLM\n",
770784
"from transformers import AutoTokenizer, pipeline\n",
771785
"\n",
786+
"from optimum.intel import OVModelForMaskedLM\n",
787+
"\n",
788+
"\n",
772789
"model_id = \"helenai/bert-base-uncased-ov\"\n",
773790
"model = OVModelForMaskedLM.from_pretrained(model_id)\n",
774791
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
@@ -835,9 +852,11 @@
835852
}
836853
],
837854
"source": [
838-
"from optimum.intel import OVModelForQuestionAnswering\n",
839855
"from transformers import AutoTokenizer, pipeline\n",
840856
"\n",
857+
"from optimum.intel import OVModelForQuestionAnswering\n",
858+
"\n",
859+
"\n",
841860
"# Load the model and tokenizer saved in Part 1 of this notebook. Or use the line below to load them from the hub\n",
842861
"# model_id = \"helenai/distilbert-base-uncased-distilled-squad-ov-fp32\"\n",
843862
"model_id = \"distilbert-base-uncased-distilled-squad-ov-fp32\"\n",
@@ -890,9 +909,11 @@
890909
}
891910
],
892911
"source": [
893-
"from optimum.intel import OVModelForSeq2SeqLM\n",
894912
"from transformers import AutoTokenizer, pipeline\n",
895913
"\n",
914+
"from optimum.intel import OVModelForSeq2SeqLM\n",
915+
"\n",
916+
"\n",
896917
"model_id = \"helenai/t5-small-ov\"\n",
897918
"model = OVModelForSeq2SeqLM.from_pretrained(model_id, compile=False, trust_remote_code=True)\n",
898919
"tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)\n",
@@ -998,9 +1019,11 @@
9981019
}
9991020
],
10001021
"source": [
1001-
"from optimum.intel import OVModelForSequenceClassification\n",
10021022
"from transformers import AutoTokenizer, pipeline\n",
10031023
"\n",
1024+
"from optimum.intel import OVModelForSequenceClassification\n",
1025+
"\n",
1026+
"\n",
10041027
"model_id = \"helenai/papluca-xlm-roberta-base-language-detection-ov\"\n",
10051028
"model = OVModelForSequenceClassification.from_pretrained(model_id)\n",
10061029
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
@@ -1047,9 +1070,11 @@
10471070
}
10481071
],
10491072
"source": [
1050-
"from optimum.intel import OVModelForTokenClassification\n",
10511073
"from transformers import AutoTokenizer, pipeline\n",
10521074
"\n",
1075+
"from optimum.intel import OVModelForTokenClassification\n",
1076+
"\n",
1077+
"\n",
10531078
"model_id = \"helenai/dslim-bert-base-NER-ov-fp32\"\n",
10541079
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
10551080
"model = OVModelForTokenClassification.from_pretrained(model_id)\n",

notebooks/openvino/quantized_generation_demo.ipynb

+16-11
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
"import os\n",
4646
"\n",
4747
"from transformers import AutoTokenizer\n",
48+
"\n",
4849
"from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig"
4950
]
5051
},
@@ -211,6 +212,7 @@
211212
"source": [
212213
"from transformers import TextStreamer\n",
213214
"\n",
215+
"\n",
214216
"# Tokenize the sample\n",
215217
"inputs = tokenizer([sample], return_tensors='pt')\n",
216218
"\n",
@@ -294,15 +296,15 @@
294296
"\n",
295297
"\n",
296298
"# Tokenize the sample\n",
297-
"inputs = tokenizer([sample], return_tensors='pt') \n",
299+
"inputs = tokenizer([sample], return_tensors='pt')\n",
298300
"\n",
299301
"out = stateless_model.generate(\n",
300302
" **inputs,\n",
301303
" max_new_tokens=128,\n",
302304
" streamer=TextStreamer(tokenizer=tokenizer, skip_special_tokens=True),\n",
303305
" pad_token_id=tokenizer.eos_token_id,\n",
304306
" prompt_lookup_num_tokens=3,\n",
305-
") "
307+
")"
306308
]
307309
},
308310
{
@@ -442,6 +444,7 @@
442444
"outputs": [],
443445
"source": [
444446
"from functools import wraps\n",
447+
"\n",
445448
"import numpy as np\n",
446449
"\n",
447450
"\n",
@@ -458,15 +461,15 @@
458461
" if len(self.seq_lens) > 0 or len(self.win_sizes) > 0:\n",
459462
" raise RuntimeError(\"Always use a new instance, don't reuse!\")\n",
460463
" self.model_forward = self.model.forward\n",
461-
" \n",
464+
"\n",
462465
" @wraps(self.model_forward)\n",
463466
" def forward_wrapper(**kwargs):\n",
464467
" self.seq_lens[-1].append(kwargs.get(\"attention_mask\").shape[-1])\n",
465468
" self.win_sizes[-1].append(kwargs.get(\"input_ids\").shape[-1] - 1)\n",
466469
" return self.model_forward(**kwargs)\n",
467-
" \n",
470+
"\n",
468471
" self.model.forward = forward_wrapper\n",
469-
" \n",
472+
"\n",
470473
" # wrap generate method\n",
471474
" self.model_generate = self.model.generate\n",
472475
"\n",
@@ -494,7 +497,7 @@
494497
" self.seq_lens = [sl[1:] for sl in self.seq_lens]\n",
495498
" # Add window size for output to ease calculation later\n",
496499
" for ws, sl in zip(self.win_sizes, self.seq_lens):\n",
497-
" ws.append(0) \n",
500+
" ws.append(0)\n",
498501
"\n",
499502
" def acceptance_rate(self, return_mean=True, normalize=False):\n",
500503
" # ar_per_win = ((cur_seq_len - cur_win_size) - (prev_seq_len - prev_win_size) - 1) / prev_win_size\n",
@@ -533,8 +536,9 @@
533536
"metadata": {},
534537
"outputs": [],
535538
"source": [
536-
"from tqdm import tqdm\n",
537539
"from datasets import load_dataset\n",
540+
"from tqdm import tqdm\n",
541+
"\n",
538542
"\n",
539543
"dataset_name = \"openai_humaneval\"\n",
540544
"dataset_subset_name = None\n",
@@ -590,10 +594,10 @@
590594
"from threading import Thread\n",
591595
"\n",
592596
"from transformers import (\n",
593-
" TextIteratorStreamer,\n",
597+
" GenerationConfig,\n",
594598
" StoppingCriteria,\n",
595599
" StoppingCriteriaList,\n",
596-
" GenerationConfig,\n",
600+
" TextIteratorStreamer,\n",
597601
")\n",
598602
"\n",
599603
"\n",
@@ -690,7 +694,7 @@
690694
" prompt_char = \"\"\n",
691695
" history[-1][1] = prompt_char\n",
692696
" yield history, \"Status: Generating...\", *([gr.update(interactive=False)] * 4)\n",
693-
" \n",
697+
"\n",
694698
" streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
695699
"\n",
696700
" # Create a stopping criteria to prevent the model from playing the role of the user aswell.\n",
@@ -770,6 +774,7 @@
770774
"source": [
771775
"import gradio as gr\n",
772776
"\n",
777+
"\n",
773778
"try:\n",
774779
" demo.close()\n",
775780
"except:\n",
@@ -808,7 +813,7 @@
808813
" history: conversation history\n",
809814
" Returns:\n",
810815
" updated history\n",
811-
" \"\"\" \n",
816+
" \"\"\"\n",
812817
" history[-1][1] = None\n",
813818
" return history\n",
814819
"\n",

notebooks/openvino/question_answering_quantization.ipynb

+3-1
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,11 @@
5151
"import transformers\n",
5252
"from evaluate import evaluator\n",
5353
"from openvino.runtime import Core\n",
54-
"from optimum.intel import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
5554
"from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline\n",
5655
"\n",
56+
"from optimum.intel import OVConfig, OVModelForQuestionAnswering, OVQuantizationConfig, OVQuantizer\n",
57+
"\n",
58+
"\n",
5759
"transformers.logging.set_verbosity_error()\n",
5860
"datasets.logging.set_verbosity_error()"
5961
]

notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb

+4-1
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,18 @@
4646
"outputs": [],
4747
"source": [
4848
"import time\n",
49+
"from pathlib import Path\n",
50+
"\n",
4951
"import datasets\n",
5052
"import matplotlib.pyplot as plt\n",
5153
"import numpy as np\n",
5254
"import transformers\n",
53-
"from pathlib import Path\n",
5455
"from openvino.runtime import Core\n",
56+
"\n",
5557
"from optimum.intel import OVConfig, OVQuantizer, OVStableDiffusionPipeline, OVWeightQuantizationConfig\n",
5658
"from optimum.intel.openvino.configuration import OVQuantizationMethod\n",
5759
"\n",
60+
"\n",
5861
"transformers.logging.set_verbosity_error()\n",
5962
"datasets.logging.set_verbosity_error()"
6063
]

optimum/exporters/ipex/model_patcher.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@
2929
from .modeling_utils import (
3030
_IPEX_MINIMUM_VERSION_FOR_PATCHING,
3131
_gpt2_block_forward,
32-
_ipex_rms_layer_norm_forward,
3332
_IPEXFalconDecoderLayer,
3433
_IPEXGPT2Attention,
3534
_IPEXIntermediate,
3635
_IPEXLlamaDecoderLayer,
36+
_llama_layer_norm_forward,
3737
_llama_model_forward,
3838
)
3939

@@ -79,7 +79,7 @@ def _patch_llama_model(model):
7979
2. Linear fusion with (2 Linears + Silu + Mul) and (Linear + Add)
8080
"""
8181
convert_functions(model, LlamaModel, "forward", _llama_model_forward)
82-
convert_functions(model, LlamaRMSNorm, "forward", _ipex_rms_layer_norm_forward)
82+
convert_functions(model, LlamaRMSNorm, "forward", _llama_layer_norm_forward)
8383
convert_class(model, LlamaDecoderLayer, _IPEXLlamaDecoderLayer, model.config)
8484
return model
8585

0 commit comments

Comments
 (0)