Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update OV quantization docs and QA notebook according to the recent changes #671

Merged
merged 4 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ Post-training static quantization introduces an additional calibration step wher

```python
from functools import partial
from optimum.intel import OVQuantizer, OVModelForSequenceClassification
from optimum.intel import OVQuantizer, OVModelForSequenceClassification, OVConfig, OVQuantizationConfig
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_id = "distilbert-base-uncased-finetuned-sst-2-english"
Expand All @@ -145,7 +145,8 @@ calibration_dataset = quantizer.get_calibration_dataset(
# The directory where the quantized model will be saved
save_dir = "nncf_results"
# Apply static quantization and save the resulting model in the OpenVINO IR format
quantizer.quantize(calibration_dataset=calibration_dataset, save_directory=save_dir)
ov_config = OVConfig(quantization_config=OVQuantizationConfig())
quantizer.quantize(ov_config=ov_config, calibration_dataset=calibration_dataset, save_directory=save_dir)
# Load the quantized model
optimized_model = OVModelForSequenceClassification.from_pretrained(save_dir)
```
Expand Down
5 changes: 3 additions & 2 deletions docs/source/optimization_ov.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ Here is how to apply static quantization on a fine-tuned DistilBERT given your o

```python
from transformers import AutoTokenizer
from optimum.intel import OVQuantizer, OVModelForSequenceClassification,
from optimum.intel import OVQuantizer, OVModelForSequenceClassification, OVConfig, OVQuantizationConfig

model_id = "distilbert-base-uncased-finetuned-sst-2-english"
model = OVModelForSequenceClassification.from_pretrained(model_id, export=True)
Expand All @@ -95,7 +95,8 @@ save_dir = "ptq_model"
quantizer = OVQuantizer.from_pretrained(model)

# Apply static quantization and export the resulting quantized model to OpenVINO IR format
quantizer.quantize(calibration_dataset=calibration_dataset, save_directory=save_dir)
ov_config = OVConfig(quantization_config=OVQuantizationConfig())
quantizer.quantize(ov_config=ov_config, calibration_dataset=calibration_dataset, save_directory=save_dir)
# Save the tokenizer
tokenizer.save_pretrained(save_dir)
```
Expand Down
13 changes: 7 additions & 6 deletions notebooks/openvino/question_answering_quantization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"import transformers\n",
"from evaluate import evaluator\n",
"from openvino.runtime import Core\n",
"from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer\n",
"from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
"from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline\n",
"\n",
"transformers.logging.set_verbosity_error()\n",
Expand Down Expand Up @@ -286,11 +286,11 @@
"**NOTE:** if you notice very low accuracy after post-training quantization, it is likely caused by an overflow issue which affects processors that do not contain VNNI (Vector Neural Network Instruction). NNCF has an `overflow_fix` option to address this. It will effectively use 7-bits for quantizing instead of 8-bits to prevent the overflow. To use this option, modify the code in the next cell to add an explicit quantization configuration, and set `overflow_fix` to `\"enable\"`:\n",
"\n",
"```\n",
"from optimum.intel.openvino import OVConfig\n",
"from optimum.intel.openvino import OVConfig, OVQuantizationConfig\n",
"\n",
"ov_config = OVConfig()\n",
"ov_config.compression[\"overflow_fix\"] = \"enable\"\n",
"quantizer = OVQuantizer.from_pretrained(model, ov_config=ov_config)\n",
"ov_config = OVConfig(quantization_config=OVQuantizationConfig(overflow_fix=\"enable\")\n",
"quantizer = OVQuantizer.from_pretrained(model)\n",
"quantizer.quantize(calibration_dataset=train_dataset, save_directory=int8_ptq_model_path, ov_config=ov_config)\n",
"```\n",
"\n",
"For more information, see [Lower Numerical Precision Deep Learning Inference and Training](https://www.intel.com/content/www/us/en/developer/articles/technical/lower-numerical-precision-deep-learning-inference-and-training.html)"
Expand All @@ -317,7 +317,8 @@
"\n",
"# Quantize the model\n",
"quantizer = OVQuantizer.from_pretrained(model)\n",
"quantizer.quantize(calibration_dataset=train_dataset, save_directory=int8_ptq_model_path)"
"ov_config = OVConfig(quantization_config=OVQuantizationConfig())\n",
"quantizer.quantize(calibration_dataset=train_dataset, ov_config=ov_config, save_directory=int8_ptq_model_path)"
]
},
{
Expand Down
7 changes: 6 additions & 1 deletion optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,13 +280,18 @@ def quantize(
raise TypeError(f"`ov_config` should be an `OVConfig`, but got: {type(ov_config)} instead.")
quantization_config = ov_config.quantization_config
if quantization_config is None:
if weights_only is None or weights_only is True:
if (weights_only is None or weights_only is True) and calibration_dataset is None:
if weights_only is None:
logger.info(
"`quantization_config` was not provided, 8-bit asymmetric weight quantization will be applied."
)
ov_config.quantization_config = OVWeightQuantizationConfig(bits=8)
else:
logger.warning(
"`quantization_config` was not provided, but calibration dataset was provided, assuming full "
"model quantization is intended. In the future, please provide `quantization_config` as an "
"instance of OVQuantizationConfig."
)
ov_config.quantization_config = OVQuantizationConfig()

if isinstance(self.model, OVBaseModel):
Expand Down
Loading