Skip to content

Commit ea19b9a

Browse files
committed
fix model export
1 parent a5f3b26 commit ea19b9a

6 files changed

+67
-67
lines changed

notebooks/ipex/text_generation.ipynb

+2-6
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,9 @@
6262
"source": [
6363
"model = IPEXModelForCausalLM.from_pretrained(\"gpt2\", torch_dtype=torch.bfloat16, export=True)\n",
6464
"tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n",
65-
"input_sentence = [\n",
66-
" \"Answer the following yes/no question by reasoning step-by-step please. Can you write a whole Haiku in a single tweet?\"\n",
67-
"]\n",
65+
"input_sentence = [\"Answer the following yes/no question by reasoning step-by-step please. Can you write a whole Haiku in a single tweet?\"]\n",
6866
"model_inputs = tokenizer(input_sentence, return_tensors=\"pt\")\n",
69-
"generation_kwargs = dict(\n",
70-
" max_new_tokens=32, do_sample=False, num_beams=4, num_beam_groups=1, no_repeat_ngram_size=2, use_cache=True\n",
71-
")\n",
67+
"generation_kwargs = dict(max_new_tokens=32, do_sample=False, num_beams=4, num_beam_groups=1, no_repeat_ngram_size=2, use_cache=True)\n",
7268
"\n",
7369
"generated_ids = model.generate(**model_inputs, **generation_kwargs)\n",
7470
"output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",

notebooks/openvino/optimum_openvino_inference.ipynb

+1-3
Original file line numberDiff line numberDiff line change
@@ -466,9 +466,7 @@
466466
"source": [
467467
"# Set the device directly with `.from_pretrained()`\n",
468468
"if \"GPU\" in Core().available_devices:\n",
469-
" model = OVModelForQuestionAnswering.from_pretrained(\n",
470-
" \"distilbert-base-uncased-distilled-squad-ov-fp16\", device=\"GPU\"\n",
471-
" )"
469+
" model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp16\", device=\"GPU\")"
472470
]
473471
},
474472
{

notebooks/openvino/quantized_generation_demo.ipynb

+47-39
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@
121121
" \"CACHE_DIR\": os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n",
122122
" },\n",
123123
" \"compile\": False,\n",
124-
" \"quantization_config\": quantization_config,\n",
124+
" \"quantization_config\": quantization_config\n",
125125
"}\n",
126126
"\n",
127127
"# Check whether the model was already exported\n",
@@ -143,8 +143,8 @@
143143
"\n",
144144
"# TODO Optional: export to huggingface/hub\n",
145145
"\n",
146-
"model_size = os.stat(os.path.join(save_name, \"openvino_model.bin\")).st_size / 1024**3\n",
147-
"print(f\"Model size in FP32: ~5.4GB, current model size in 4bit: {model_size:.2f}GB\")"
146+
"model_size = os.stat(os.path.join(save_name, \"openvino_model.bin\")).st_size / 1024 ** 3\n",
147+
"print(f'Model size in FP32: ~5.4GB, current model size in 4bit: {model_size:.2f}GB')"
148148
]
149149
},
150150
{
@@ -212,7 +212,7 @@
212212
"from transformers import TextStreamer\n",
213213
"\n",
214214
"# Tokenize the sample\n",
215-
"inputs = tokenizer([sample], return_tensors=\"pt\")\n",
215+
"inputs = tokenizer([sample], return_tensors='pt')\n",
216216
"\n",
217217
"# Call generate on the inputs\n",
218218
"out = model.generate(\n",
@@ -294,15 +294,15 @@
294294
"\n",
295295
"\n",
296296
"# Tokenize the sample\n",
297-
"inputs = tokenizer([sample], return_tensors=\"pt\")\n",
297+
"inputs = tokenizer([sample], return_tensors='pt') \n",
298298
"\n",
299299
"out = stateless_model.generate(\n",
300300
" **inputs,\n",
301301
" max_new_tokens=128,\n",
302302
" streamer=TextStreamer(tokenizer=tokenizer, skip_special_tokens=True),\n",
303303
" pad_token_id=tokenizer.eos_token_id,\n",
304304
" prompt_lookup_num_tokens=3,\n",
305-
")"
305+
") "
306306
]
307307
},
308308
{
@@ -358,7 +358,7 @@
358358
" \"CACHE_DIR\": os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n",
359359
" },\n",
360360
" \"compile\": False,\n",
361-
" \"quantization_config\": quantization_config,\n",
361+
" \"quantization_config\": quantization_config\n",
362362
"}\n",
363363
"\n",
364364
"# Check whether the model was already exported\n",
@@ -458,15 +458,15 @@
458458
" if len(self.seq_lens) > 0 or len(self.win_sizes) > 0:\n",
459459
" raise RuntimeError(\"Always use a new instance, don't reuse!\")\n",
460460
" self.model_forward = self.model.forward\n",
461-
"\n",
461+
" \n",
462462
" @wraps(self.model_forward)\n",
463463
" def forward_wrapper(**kwargs):\n",
464464
" self.seq_lens[-1].append(kwargs.get(\"attention_mask\").shape[-1])\n",
465465
" self.win_sizes[-1].append(kwargs.get(\"input_ids\").shape[-1] - 1)\n",
466466
" return self.model_forward(**kwargs)\n",
467-
"\n",
467+
" \n",
468468
" self.model.forward = forward_wrapper\n",
469-
"\n",
469+
" \n",
470470
" # wrap generate method\n",
471471
" self.model_generate = self.model.generate\n",
472472
"\n",
@@ -479,11 +479,10 @@
479479
" out = self.model_generate(*args, **kwargs)\n",
480480
" self.seq_lens[-1].append(out.shape[-1])\n",
481481
" return out\n",
482-
"\n",
483482
" self.model.generate = generate_wrapper\n",
484483
" return self\n",
485484
"\n",
486-
" def __exit__(self, type, value, traceback):\n",
485+
" def __exit__(self, type, value, traceback):\n",
487486
" self.model.forward = self.model_forward\n",
488487
" self.model.generate = self.model_generate\n",
489488
" self.model_forward = None\n",
@@ -495,7 +494,7 @@
495494
" self.seq_lens = [sl[1:] for sl in self.seq_lens]\n",
496495
" # Add window size for output to ease calculation later\n",
497496
" for ws, sl in zip(self.win_sizes, self.seq_lens):\n",
498-
" ws.append(0)\n",
497+
" ws.append(0) \n",
499498
"\n",
500499
" def acceptance_rate(self, return_mean=True, normalize=False):\n",
501500
" # ar_per_win = ((cur_seq_len - cur_win_size) - (prev_seq_len - prev_win_size) - 1) / prev_win_size\n",
@@ -504,8 +503,9 @@
504503
" sl = np.array(sl, dtype=np.float64)\n",
505504
" ws = np.array(ws, dtype=np.float64)\n",
506505
" out_lens = sl - ws\n",
507-
" accepted = out_lens[1:] - out_lens[:-1] - 1\n",
508-
" ar_per_win.append(np.divide(accepted, ws[:-1], out=np.zeros_like(accepted), where=ws[:-1] != 0))\n",
506+
" accepted = (out_lens[1:] - out_lens[:-1] - 1)\n",
507+
" ar_per_win.append(np.divide(accepted, ws[:-1],\n",
508+
" out=np.zeros_like(accepted),where=ws[:-1] != 0))\n",
509509
" ar_per_win = np.hstack(ar_per_win)\n",
510510
" # Normalized AR doesn't take into account windows with size 0\n",
511511
" if normalize:\n",
@@ -544,7 +544,7 @@
544544
"samples_number = 30\n",
545545
"with AcceptanceRateRecorder(stateless_model) as ar_recorder:\n",
546546
" for text in tqdm(dataset[:samples_number]):\n",
547-
" tokenized_prompt = tokenizer([prompt_template.format(text=text)], return_tensors=\"pt\")\n",
547+
" tokenized_prompt = tokenizer([prompt_template.format(text=text)], return_tensors='pt')\n",
548548
" stateless_model.generate(\n",
549549
" **tokenized_prompt,\n",
550550
" max_new_tokens=128,\n",
@@ -623,6 +623,7 @@
623623
" return False\n",
624624
"\n",
625625
"\n",
626+
"\n",
626627
"# Set the chat template to the tokenizer. The chat template implements the simple template of\n",
627628
"# User: content\n",
628629
"# Assistant: content\n",
@@ -650,7 +651,11 @@
650651
" if model_msg:\n",
651652
" messages.append({\"role\": \"Assistant\", \"content\": model_msg})\n",
652653
" input_token = tokenizer.apply_chat_template(\n",
653-
" messages, add_generation_prompt=True, tokenize=True, return_tensors=\"pt\", return_dict=True\n",
654+
" messages,\n",
655+
" add_generation_prompt=True,\n",
656+
" tokenize=True,\n",
657+
" return_tensors=\"pt\",\n",
658+
" return_dict=True\n",
654659
" )\n",
655660
" return input_token\n",
656661
"\n",
@@ -674,18 +679,18 @@
674679
" # Construct the input message string for the model by concatenating the current system message and conversation history\n",
675680
" # Tokenize the messages string\n",
676681
" inputs = prepare_history_for_model(history)\n",
677-
" input_length = inputs[\"input_ids\"].shape[1]\n",
682+
" input_length = inputs['input_ids'].shape[1]\n",
678683
" # truncate input in case it is too long.\n",
679684
" # TODO improve this\n",
680685
" if input_length > 2000:\n",
681686
" history = [history[-1]]\n",
682687
" inputs = prepare_history_for_model(history)\n",
683-
" input_length = inputs[\"input_ids\"].shape[1]\n",
688+
" input_length = inputs['input_ids'].shape[1]\n",
684689
"\n",
685690
" prompt_char = \"\"\n",
686691
" history[-1][1] = prompt_char\n",
687692
" yield history, \"Status: Generating...\", *([gr.update(interactive=False)] * 4)\n",
688-
"\n",
693+
" \n",
689694
" streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
690695
"\n",
691696
" # Create a stopping criteria to prevent the model from playing the role of the user aswell.\n",
@@ -701,14 +706,11 @@
701706
" eos_token_id=[tokenizer.eos_token_id],\n",
702707
" pad_token_id=tokenizer.eos_token_id,\n",
703708
" )\n",
704-
" generate_kwargs = (\n",
705-
" dict(\n",
706-
" streamer=streamer,\n",
707-
" generation_config=generation_config,\n",
708-
" stopping_criteria=stopping_criteria,\n",
709-
" )\n",
710-
" | inputs\n",
711-
" )\n",
709+
" generate_kwargs = dict(\n",
710+
" streamer=streamer,\n",
711+
" generation_config=generation_config,\n",
712+
" stopping_criteria=stopping_criteria,\n",
713+
" ) | inputs\n",
712714
"\n",
713715
" if assisted:\n",
714716
" target_generate = stateless_model.generate\n",
@@ -735,7 +737,7 @@
735737
" yield history, \"Status: Generating...\", *([gr.update(interactive=False)] * 4)\n",
736738
" history[-1][1] = partial_text\n",
737739
" generation_time = time.perf_counter() - start\n",
738-
" yield history, f\"Generation time: {generation_time:.2f} sec\", *([gr.update(interactive=True)] * 4)"
740+
" yield history, f'Generation time: {generation_time:.2f} sec', *([gr.update(interactive=True)] * 4)"
739741
]
740742
},
741743
{
@@ -779,9 +781,7 @@
779781
" [\"Can you explain to me briefly what is Python programming language?\"],\n",
780782
" [\"Explain the plot of Cinderella in a sentence.\"],\n",
781783
" [\"Write a Python function to perform binary search over a sorted list. Use markdown to write code\"],\n",
782-
" [\n",
783-
" \"Lily has a rubber ball that she drops from the top of a wall. The wall is 2 meters tall. How long will it take for the ball to reach the ground?\"\n",
784-
" ],\n",
784+
" [\"Lily has a rubber ball that she drops from the top of a wall. The wall is 2 meters tall. How long will it take for the ball to reach the ground?\"],\n",
785785
"]\n",
786786
"\n",
787787
"\n",
@@ -797,7 +797,7 @@
797797
" \"\"\"\n",
798798
" # Append current user message to history with a blank assistant message which will be generated by the model\n",
799799
" history.append([message, None])\n",
800-
" return (\"\", history)\n",
800+
" return ('', history)\n",
801801
"\n",
802802
"\n",
803803
"def prepare_for_regenerate(history):\n",
@@ -808,7 +808,7 @@
808808
" history: conversation history\n",
809809
" Returns:\n",
810810
" updated history\n",
811-
" \"\"\"\n",
811+
" \"\"\" \n",
812812
" history[-1][1] = None\n",
813813
" return history\n",
814814
"\n",
@@ -821,7 +821,7 @@
821821
" msg = gr.Textbox(placeholder=\"Enter message here...\", show_label=False, autofocus=True, scale=75)\n",
822822
" status = gr.Textbox(\"Status: Idle\", show_label=False, max_lines=1, scale=15)\n",
823823
" with gr.Row():\n",
824-
" submit = gr.Button(\"Submit\", variant=\"primary\")\n",
824+
" submit = gr.Button(\"Submit\", variant='primary')\n",
825825
" regenerate = gr.Button(\"Regenerate\")\n",
826826
" clear = gr.Button(\"Clear\")\n",
827827
" with gr.Accordion(\"Advanced Options:\", open=False):\n",
@@ -860,7 +860,9 @@
860860
" step=0.1,\n",
861861
" interactive=True,\n",
862862
" )\n",
863-
" gr.Examples(EXAMPLES, inputs=msg, label=\"Click on any example and press the 'Submit' button\")\n",
863+
" gr.Examples(\n",
864+
" EXAMPLES, inputs=msg, label=\"Click on any example and press the 'Submit' button\"\n",
865+
" )\n",
864866
"\n",
865867
" # Sets generate function to be triggered when the user submit a new message\n",
866868
" gr.on(\n",
@@ -874,14 +876,20 @@
874876
" inputs=[chatbot, temperature, max_new_tokens, top_p, repetition_penalty, assisted],\n",
875877
" outputs=[chatbot, status, msg, submit, regenerate, clear],\n",
876878
" concurrency_limit=1,\n",
877-
" queue=True,\n",
879+
" queue=True\n",
878880
" )\n",
879-
" regenerate.click(fn=prepare_for_regenerate, inputs=chatbot, outputs=chatbot, queue=True, concurrency_limit=1).then(\n",
881+
" regenerate.click(\n",
882+
" fn=prepare_for_regenerate,\n",
883+
" inputs=chatbot,\n",
884+
" outputs=chatbot,\n",
885+
" queue=True,\n",
886+
" concurrency_limit=1\n",
887+
" ).then(\n",
880888
" fn=generate,\n",
881889
" inputs=[chatbot, temperature, max_new_tokens, top_p, repetition_penalty, assisted],\n",
882890
" outputs=[chatbot, status, msg, submit, regenerate, clear],\n",
883891
" concurrency_limit=1,\n",
884-
" queue=True,\n",
892+
" queue=True\n",
885893
" )\n",
886894
" clear.click(fn=lambda: (None, \"Status: Idle\"), inputs=None, outputs=[chatbot, status], queue=False)"
887895
]

notebooks/openvino/stable_diffusion_hybrid_quantization.ipynb

+11-14
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@
167167
"def preprocess_fn(example):\n",
168168
" return {\"prompt\": example[\"caption\"]}\n",
169169
"\n",
170-
"\n",
171170
"NUM_SAMPLES = 200\n",
172171
"dataset = dataset.take(NUM_SAMPLES)\n",
173172
"calibration_dataset = dataset.map(lambda x: preprocess_fn(x), remove_columns=dataset.column_names)"
@@ -1067,14 +1066,12 @@
10671066
],
10681067
"source": [
10691068
"int8_pipe = OVStableDiffusionPipeline.from_pretrained(model_id=MODEL_ID, export=True)\n",
1070-
"quantization_config = OVWeightQuantizationConfig(\n",
1071-
" bits=8, num_samples=NUM_SAMPLES, quant_method=OVQuantizationMethod.HYBRID\n",
1072-
")\n",
1069+
"quantization_config = OVWeightQuantizationConfig(bits=8, num_samples=NUM_SAMPLES, quant_method=OVQuantizationMethod.HYBRID)\n",
10731070
"quantizer = OVQuantizer(int8_pipe)\n",
10741071
"quantizer.quantize(\n",
10751072
" ov_config=OVConfig(quantization_config=quantization_config),\n",
10761073
" calibration_dataset=calibration_dataset,\n",
1077-
" save_directory=int8_model_path,\n",
1074+
" save_directory=int8_model_path\n",
10781075
")"
10791076
]
10801077
},
@@ -1205,10 +1202,8 @@
12051202
" im_w, im_h = fp32_img.size\n",
12061203
" is_horizontal = im_h <= im_w\n",
12071204
" figsize = (20, 30) if is_horizontal else (30, 20)\n",
1208-
" fig, axs = plt.subplots(\n",
1209-
" 1 if is_horizontal else 2, 2 if is_horizontal else 1, figsize=figsize, sharex=\"all\", sharey=\"all\"\n",
1210-
" )\n",
1211-
" fig.patch.set_facecolor(\"white\")\n",
1205+
" fig, axs = plt.subplots(1 if is_horizontal else 2, 2 if is_horizontal else 1, figsize=figsize, sharex='all', sharey='all')\n",
1206+
" fig.patch.set_facecolor('white')\n",
12121207
" list_axes = list(axs.flat)\n",
12131208
" for a in list_axes:\n",
12141209
" a.set_xticklabels([])\n",
@@ -1222,7 +1217,7 @@
12221217
" img2_title = \"INT8 result\"\n",
12231218
" list_axes[0].set_title(img1_title, fontsize=20)\n",
12241219
" list_axes[1].set_title(img2_title, fontsize=20)\n",
1225-
" fig.subplots_adjust(wspace=0.0 if is_horizontal else 0.01, hspace=0.01 if is_horizontal else 0.0)\n",
1220+
" fig.subplots_adjust(wspace=0.0 if is_horizontal else 0.01 , hspace=0.01 if is_horizontal else 0.0)\n",
12261221
" fig.tight_layout()"
12271222
]
12281223
},
@@ -1235,10 +1230,13 @@
12351230
"source": [
12361231
"prompt = \"Self-portrait oil painting, a beautiful cyborg with golden hair, 8k\"\n",
12371232
"\n",
1238-
"\n",
12391233
"def generate_image(pipeline, prompt):\n",
12401234
" transformers.set_seed(1)\n",
1241-
" return pipeline(prompt=prompt, guidance_scale=8.0, output_type=\"pil\").images[0]"
1235+
" return pipeline(\n",
1236+
" prompt=prompt,\n",
1237+
" guidance_scale=8.0,\n",
1238+
" output_type=\"pil\"\n",
1239+
" ).images[0]"
12421240
]
12431241
},
12441242
{
@@ -1331,7 +1329,7 @@
13311329
"def get_model_size(model_folder, framework):\n",
13321330
" \"\"\"\n",
13331331
" Return OpenVINO or PyTorch model size in Mb.\n",
1334-
"\n",
1332+
" \n",
13351333
" Arguments:\n",
13361334
" model_folder:\n",
13371335
" Directory containing a model.\n",
@@ -1533,7 +1531,6 @@
15331531
"def get_val_dataset(num_items=3):\n",
15341532
" return [item[\"caption\"] for item in dataset.take(num_items)]\n",
15351533
"\n",
1536-
"\n",
15371534
"def benchmark(pipeline, dataset):\n",
15381535
" \"\"\"\n",
15391536
" Benchmark PyTorch or OpenVINO model. This function does inference on `num_items`\n",

optimum/exporters/openvino/convert.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -915,7 +915,7 @@ def get_diffusion_models_for_export_ext(
915915
is_sd3 = False
916916

917917
if not is_sd3:
918-
return get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter)
918+
return None, get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter)
919919

920920
models_for_export = {}
921921

0 commit comments

Comments
 (0)