|
51 | 51 | "import transformers\n",
|
52 | 52 | "from evaluate import evaluator\n",
|
53 | 53 | "from openvino.runtime import Core\n",
|
54 |
| - "from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer\n", |
| 54 | + "from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n", |
55 | 55 | "from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline\n",
|
56 | 56 | "\n",
|
57 | 57 | "transformers.logging.set_verbosity_error()\n",
|
|
286 | 286 | "**NOTE:** if you notice very low accuracy after post-training quantization, it is likely caused by an overflow issue which affects processors that do not contain VNNI (Vector Neural Network Instruction). NNCF has an `overflow_fix` option to address this. It will effectively use 7-bits for quantizing instead of 8-bits to prevent the overflow. To use this option, modify the code in the next cell to add an explicit quantization configuration, and set `overflow_fix` to `\"enable\"`:\n",
|
287 | 287 | "\n",
|
288 | 288 | "```\n",
|
289 |
| - "from optimum.intel.openvino import OVConfig\n", |
| 289 | + "from optimum.intel.openvino import OVConfig, OVQuantizationConfig\n", |
290 | 290 | "\n",
|
291 |
| - "ov_config = OVConfig()\n", |
292 |
| - "ov_config.compression[\"overflow_fix\"] = \"enable\"\n", |
293 |
| - "quantizer = OVQuantizer.from_pretrained(model, ov_config=ov_config)\n", |
| 291 | + "ov_config = OVConfig(quantization_config=OVQuantizationConfig(overflow_fix=\"enable\")\n", |
| 292 | + "quantizer = OVQuantizer.from_pretrained(model)\n", |
| 293 | + "quantizer.quantize(calibration_dataset=train_dataset, save_directory=int8_ptq_model_path, ov_config=ov_config)\n", |
294 | 294 | "```\n",
|
295 | 295 | "\n",
|
296 | 296 | "For more information, see [Lower Numerical Precision Deep Learning Inference and Training](https://www.intel.com/content/www/us/en/developer/articles/technical/lower-numerical-precision-deep-learning-inference-and-training.html)"
|
|
317 | 317 | "\n",
|
318 | 318 | "# Quantize the model\n",
|
319 | 319 | "quantizer = OVQuantizer.from_pretrained(model)\n",
|
320 |
| - "quantizer.quantize(calibration_dataset=train_dataset, save_directory=int8_ptq_model_path)" |
| 320 | + "ov_config = OVConfig(quantization_config=OVQuantizationConfig())\n", |
| 321 | + "quantizer.quantize(calibration_dataset=train_dataset, ov_config=ov_config, save_directory=int8_ptq_model_path)" |
321 | 322 | ]
|
322 | 323 | },
|
323 | 324 | {
|
|
0 commit comments