Skip to content

Commit 673b88b

Browse files
Update OV quantization docs and QA notebook according to the recent changes (huggingface#671)
* Fix quantization call in QA notebook * Update OV quantization docs * Apply PTQ if quantization config was not provided, but calibration dataset was provided * Add warning
1 parent e6e5ffd commit 673b88b

File tree

4 files changed

+19
-11
lines changed

4 files changed

+19
-11
lines changed

README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ Post-training static quantization introduces an additional calibration step wher
128128

129129
```python
130130
from functools import partial
131-
from optimum.intel import OVQuantizer, OVModelForSequenceClassification
131+
from optimum.intel import OVQuantizer, OVModelForSequenceClassification, OVConfig, OVQuantizationConfig
132132
from transformers import AutoTokenizer, AutoModelForSequenceClassification
133133

134134
model_id = "distilbert-base-uncased-finetuned-sst-2-english"
@@ -151,7 +151,8 @@ calibration_dataset = quantizer.get_calibration_dataset(
151151
# The directory where the quantized model will be saved
152152
save_dir = "nncf_results"
153153
# Apply static quantization and save the resulting model in the OpenVINO IR format
154-
quantizer.quantize(calibration_dataset=calibration_dataset, save_directory=save_dir)
154+
ov_config = OVConfig(quantization_config=OVQuantizationConfig())
155+
quantizer.quantize(ov_config=ov_config, calibration_dataset=calibration_dataset, save_directory=save_dir)
155156
# Load the quantized model
156157
optimized_model = OVModelForSequenceClassification.from_pretrained(save_dir)
157158
```

docs/source/optimization_ov.mdx

+3-2
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ Here is how to apply static quantization on a fine-tuned DistilBERT given your o
8484

8585
```python
8686
from transformers import AutoTokenizer
87-
from optimum.intel import OVQuantizer, OVModelForSequenceClassification,
87+
from optimum.intel import OVQuantizer, OVModelForSequenceClassification, OVConfig, OVQuantizationConfig
8888

8989
model_id = "distilbert-base-uncased-finetuned-sst-2-english"
9090
model = OVModelForSequenceClassification.from_pretrained(model_id, export=True)
@@ -95,7 +95,8 @@ save_dir = "ptq_model"
9595
quantizer = OVQuantizer.from_pretrained(model)
9696

9797
# Apply static quantization and export the resulting quantized model to OpenVINO IR format
98-
quantizer.quantize(calibration_dataset=calibration_dataset, save_directory=save_dir)
98+
ov_config = OVConfig(quantization_config=OVQuantizationConfig())
99+
quantizer.quantize(ov_config=ov_config, calibration_dataset=calibration_dataset, save_directory=save_dir)
99100
# Save the tokenizer
100101
tokenizer.save_pretrained(save_dir)
101102
```

notebooks/openvino/question_answering_quantization.ipynb

+7-6
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
"import transformers\n",
5252
"from evaluate import evaluator\n",
5353
"from openvino.runtime import Core\n",
54-
"from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer\n",
54+
"from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
5555
"from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline\n",
5656
"\n",
5757
"transformers.logging.set_verbosity_error()\n",
@@ -286,11 +286,11 @@
286286
"**NOTE:** if you notice very low accuracy after post-training quantization, it is likely caused by an overflow issue which affects processors that do not contain VNNI (Vector Neural Network Instruction). NNCF has an `overflow_fix` option to address this. It will effectively use 7-bits for quantizing instead of 8-bits to prevent the overflow. To use this option, modify the code in the next cell to add an explicit quantization configuration, and set `overflow_fix` to `\"enable\"`:\n",
287287
"\n",
288288
"```\n",
289-
"from optimum.intel.openvino import OVConfig\n",
289+
"from optimum.intel.openvino import OVConfig, OVQuantizationConfig\n",
290290
"\n",
291-
"ov_config = OVConfig()\n",
292-
"ov_config.compression[\"overflow_fix\"] = \"enable\"\n",
293-
"quantizer = OVQuantizer.from_pretrained(model, ov_config=ov_config)\n",
291+
"ov_config = OVConfig(quantization_config=OVQuantizationConfig(overflow_fix=\"enable\")\n",
292+
"quantizer = OVQuantizer.from_pretrained(model)\n",
293+
"quantizer.quantize(calibration_dataset=train_dataset, save_directory=int8_ptq_model_path, ov_config=ov_config)\n",
294294
"```\n",
295295
"\n",
296296
"For more information, see [Lower Numerical Precision Deep Learning Inference and Training](https://www.intel.com/content/www/us/en/developer/articles/technical/lower-numerical-precision-deep-learning-inference-and-training.html)"
@@ -317,7 +317,8 @@
317317
"\n",
318318
"# Quantize the model\n",
319319
"quantizer = OVQuantizer.from_pretrained(model)\n",
320-
"quantizer.quantize(calibration_dataset=train_dataset, save_directory=int8_ptq_model_path)"
320+
"ov_config = OVConfig(quantization_config=OVQuantizationConfig())\n",
321+
"quantizer.quantize(calibration_dataset=train_dataset, ov_config=ov_config, save_directory=int8_ptq_model_path)"
321322
]
322323
},
323324
{

optimum/intel/openvino/quantization.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -280,13 +280,18 @@ def quantize(
280280
raise TypeError(f"`ov_config` should be an `OVConfig`, but got: {type(ov_config)} instead.")
281281
quantization_config = ov_config.quantization_config
282282
if quantization_config is None:
283-
if weights_only is None or weights_only is True:
283+
if (weights_only is None or weights_only is True) and calibration_dataset is None:
284284
if weights_only is None:
285285
logger.info(
286286
"`quantization_config` was not provided, 8-bit asymmetric weight quantization will be applied."
287287
)
288288
ov_config.quantization_config = OVWeightQuantizationConfig(bits=8)
289289
else:
290+
logger.warning(
291+
"`quantization_config` was not provided, but calibration dataset was provided, assuming full "
292+
"model quantization is intended. In the future, please provide `quantization_config` as an "
293+
"instance of OVQuantizationConfig."
294+
)
290295
ov_config.quantization_config = OVQuantizationConfig()
291296

292297
if isinstance(self.model, OVBaseModel):

0 commit comments

Comments
 (0)