Skip to content

Commit 0fe3ced

Browse files
Fix quantization call in QA notebook
1 parent bce36d2 commit 0fe3ced

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

notebooks/openvino/question_answering_quantization.ipynb

+7-6
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
"import transformers\n",
5252
"from evaluate import evaluator\n",
5353
"from openvino.runtime import Core\n",
54-
"from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer\n",
54+
"from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
5555
"from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline\n",
5656
"\n",
5757
"transformers.logging.set_verbosity_error()\n",
@@ -286,11 +286,11 @@
286286
"**NOTE:** if you notice very low accuracy after post-training quantization, it is likely caused by an overflow issue which affects processors that do not contain VNNI (Vector Neural Network Instruction). NNCF has an `overflow_fix` option to address this. It will effectively use 7-bits for quantizing instead of 8-bits to prevent the overflow. To use this option, modify the code in the next cell to add an explicit quantization configuration, and set `overflow_fix` to `\"enable\"`:\n",
287287
"\n",
288288
"```\n",
289-
"from optimum.intel.openvino import OVConfig\n",
289+
"from optimum.intel.openvino import OVConfig, OVQuantizationConfig\n",
290290
"\n",
291-
"ov_config = OVConfig()\n",
292-
"ov_config.compression[\"overflow_fix\"] = \"enable\"\n",
293-
"quantizer = OVQuantizer.from_pretrained(model, ov_config=ov_config)\n",
291+
"ov_config = OVConfig(quantization_config=OVQuantizationConfig(overflow_fix=\"enable\")\n",
292+
"quantizer = OVQuantizer.from_pretrained(model)\n",
293+
"quantizer.quantize(calibration_dataset=train_dataset, save_directory=int8_ptq_model_path, ov_config=ov_config)\n",
294294
"```\n",
295295
"\n",
296296
"For more information, see [Lower Numerical Precision Deep Learning Inference and Training](https://www.intel.com/content/www/us/en/developer/articles/technical/lower-numerical-precision-deep-learning-inference-and-training.html)"
@@ -317,7 +317,8 @@
317317
"\n",
318318
"# Quantize the model\n",
319319
"quantizer = OVQuantizer.from_pretrained(model)\n",
320-
"quantizer.quantize(calibration_dataset=train_dataset, save_directory=int8_ptq_model_path)"
320+
"ov_config = OVConfig(quantization_config=OVQuantizationConfig())\n",
321+
"quantizer.quantize(calibration_dataset=train_dataset, ov_config=ov_config, save_directory=int8_ptq_model_path)"
321322
]
322323
},
323324
{

0 commit comments

Comments
 (0)