Fix quantization call in QA notebook

nikita-savelyevv · nikita-savelyevv · commit 0fe3ced3146c · 2024-04-17T16:05:11.000+02:00
diff --git a/notebooks/openvino/question_answering_quantization.ipynb b/notebooks/openvino/question_answering_quantization.ipynb
@@ -51,7 +51,7 @@
     "import transformers\n",
     "from evaluate import evaluator\n",
     "from openvino.runtime import Core\n",
-    "from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer\n",
+    "from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
     "from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline\n",
     "\n",
     "transformers.logging.set_verbosity_error()\n",
@@ -286,11 +286,11 @@
     "**NOTE:** if you notice very low accuracy after post-training quantization, it is likely caused by an overflow issue which affects processors that do not contain VNNI (Vector Neural Network Instruction). NNCF has an `overflow_fix` option to address this. It will effectively use 7-bits for quantizing instead of 8-bits to prevent the overflow. To use this option, modify the code in the next cell to add an explicit quantization configuration, and set `overflow_fix` to `\"enable\"`:\n",
     "\n",
     "```\n",
-    "from optimum.intel.openvino import OVConfig\n",
+    "from optimum.intel.openvino import OVConfig, OVQuantizationConfig\n",
     "\n",
-    "ov_config = OVConfig()\n",
-    "ov_config.compression[\"overflow_fix\"] = \"enable\"\n",
-    "quantizer = OVQuantizer.from_pretrained(model, ov_config=ov_config)\n",
+    "ov_config = OVConfig(quantization_config=OVQuantizationConfig(overflow_fix=\"enable\")\n",
+    "quantizer = OVQuantizer.from_pretrained(model)\n",
+    "quantizer.quantize(calibration_dataset=train_dataset, save_directory=int8_ptq_model_path, ov_config=ov_config)\n",
     "```\n",
     "\n",
     "For more information, see [Lower Numerical Precision Deep Learning Inference and Training](https://www.intel.com/content/www/us/en/developer/articles/technical/lower-numerical-precision-deep-learning-inference-and-training.html)"
@@ -317,7 +317,8 @@
     "\n",
     "# Quantize the model\n",
     "quantizer = OVQuantizer.from_pretrained(model)\n",
-    "quantizer.quantize(calibration_dataset=train_dataset, save_directory=int8_ptq_model_path)"
+    "ov_config = OVConfig(quantization_config=OVQuantizationConfig())\n",
+    "quantizer.quantize(calibration_dataset=train_dataset, ov_config=ov_config, save_directory=int8_ptq_model_path)"
    ]
   },
   {