From 74b8dc66d9667eeecd7a2c3eab09d3a09b81b0c4 Mon Sep 17 00:00:00 2001 From: Alexander Date: Fri, 8 Mar 2024 14:50:19 +0400 Subject: [PATCH] Updates weight quantization section in the docs --- docs/source/optimization_ov.mdx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/source/optimization_ov.mdx b/docs/source/optimization_ov.mdx index 51067b0b64..088b78f0d3 100644 --- a/docs/source/optimization_ov.mdx +++ b/docs/source/optimization_ov.mdx @@ -82,7 +82,17 @@ from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig model = OVModelForCausalLM.from_pretrained( model_id, - export=True, + quantization_config=OVWeightQuantizationConfig(bits=4), +) +``` + +You can tune quantization parameters to achieve a better performance accuracy trade-off as follows: + +```python +from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig + +model = OVModelForCausalLM.from_pretrained( + model_id, quantization_config=OVWeightQuantizationConfig(bits=4, sym=False, ratio=0.8, dataset="ptb"), ) ```