Skip to content

Commit 2e6d25b

Browse files
committed
add
1 parent eaf327c commit 2e6d25b

File tree

1 file changed

+15
-15
lines changed

1 file changed

+15
-15
lines changed

notebooks/openvino/quantized_generation_demo.ipynb

+15-15
Original file line numberDiff line numberDiff line change
@@ -75,16 +75,16 @@
7575
"metadata": {},
7676
"outputs": [],
7777
"source": [
78-
"model_name = 'microsoft/phi-2'\n",
79-
"save_name = model_name.split(\"/\")[-1] + '_openvino'\n",
80-
"precision = 'f32'\n",
78+
"model_name = \"microsoft/phi-2\"\n",
79+
"save_name = model_name.split(\"/\")[-1] + \"_openvino\"\n",
80+
"precision = \"f32\"\n",
8181
"quantization_config = OVWeightQuantizationConfig(\n",
8282
" bits=4,\n",
8383
" sym=False,\n",
8484
" group_size=128,\n",
8585
" ratio=0.8,\n",
8686
")\n",
87-
"device = 'gpu'"
87+
"device = \"gpu\""
8888
]
8989
},
9090
{
@@ -114,14 +114,14 @@
114114
"source": [
115115
"# Load kwargs\n",
116116
"load_kwargs = {\n",
117-
" 'device': device,\n",
118-
" 'ov_config': {\n",
117+
" \"device\": device,\n",
118+
" \"ov_config\": {\n",
119119
" \"PERFORMANCE_HINT\": \"LATENCY\",\n",
120120
" \"INFERENCE_PRECISION_HINT\": precision,\n",
121121
" \"CACHE_DIR\": os.path.join(save_name, \"model_cache\"), # OpenVINO will use this directory as cache\n",
122122
" },\n",
123-
" 'compile': False,\n",
124-
" 'quantization_config': quantization_config\n",
123+
" \"compile\": False,\n",
124+
" \"quantization_config\": quantization_config\n",
125125
"}\n",
126126
"\n",
127127
"# Check whether the model was already exported\n",
@@ -143,7 +143,7 @@
143143
"\n",
144144
"# TODO Optional: export to huggingface/hub\n",
145145
"\n",
146-
"model_size = os.stat(os.path.join(save_name, 'openvino_model.bin')).st_size / 1024 ** 3\n",
146+
"model_size = os.stat(os.path.join(save_name, \"openvino_model.bin\")).st_size / 1024 ** 3\n",
147147
"print(f'Model size in FP32: ~5.4GB, current model size in 4bit: {model_size:.2f}GB')"
148148
]
149149
},
@@ -312,12 +312,12 @@
312312
" for idx, (user_msg, model_msg) in enumerate(history):\n",
313313
" # skip the last assistant message if its empty, the tokenizer will do the formating\n",
314314
" if idx == len(history) - 1 and not model_msg:\n",
315-
" messages.append({'role': 'User', 'content': user_msg})\n",
315+
" messages.append({\"role\": \"User\", \"content\": user_msg})\n",
316316
" break\n",
317317
" if user_msg:\n",
318-
" messages.append({'role': 'User', 'content': user_msg})\n",
318+
" messages.append({\"role\": \"User\", \"content\": user_msg})\n",
319319
" if model_msg:\n",
320-
" messages.append({'role': 'Assistant', 'content': model_msg})\n",
320+
" messages.append({\"role\": \"Assistant\", \"content\": model_msg})\n",
321321
" input_token = tokenizer.apply_chat_template(\n",
322322
" messages,\n",
323323
" add_generation_prompt=True,\n",
@@ -356,7 +356,7 @@
356356
"\n",
357357
" prompt_char = '▌'\n",
358358
" history[-1][1] = prompt_char\n",
359-
" yield (history, 'Status: Generating...')\n",
359+
" yield (history, \"Status: Generating...\")\n",
360360
" \n",
361361
" streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
362362
"\n",
@@ -394,7 +394,7 @@
394394
" break\n",
395395
" elif is_partial_stop(partial_text, stop_str):\n",
396396
" continue\n",
397-
" yield (history, 'Status: Generating...')\n",
397+
" yield (history, \"Status: Generating...\")\n",
398398
" history[-1][1] = partial_text\n",
399399
" generation_time = time.perf_counter() - start\n",
400400
" yield (history, f'Generation time: {generation_time:.2f} sec')"
@@ -519,7 +519,7 @@
519519
" queue=True\n",
520520
" )\n",
521521
" \n",
522-
" clear.click(fn=lambda: (None, 'Status: Idle'), inputs=None, outputs=[chatbot, status], queue=False)"
522+
" clear.click(fn=lambda: (None, \"Status: Idle\"), inputs=None, outputs=[chatbot, status], queue=False)"
523523
]
524524
},
525525
{

0 commit comments

Comments
 (0)