Skip to content

Commit 1138ff9

Browse files
committed
Add doc and test
1 parent 89b3487 commit 1138ff9

File tree

4 files changed

+47
-18
lines changed

4 files changed

+47
-18
lines changed

README.md

+7-1
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,18 @@ It is possible to export your model to the [OpenVINO IR](https://docs.openvino.a
7878
optimum-cli export openvino --model gpt2 ov_model
7979
```
8080

81-
You can also apply 8-bit weight-only quantization when exporting your model : the model linear and embedding weights will be quantized to INT8, the activations will be kept in floating point precision.
81+
You can also apply 8-bit weight-only quantization when exporting your model : the model linear, embedding and convolution weights will be quantized to INT8, the activations will be kept in floating point precision.
8282

8383
```plain
8484
optimum-cli export openvino --model gpt2 --weight-format int8 ov_model
8585
```
8686

87+
Quantization in hybrid mode can be applied to Stable Diffusion pipeline during model export. This involves applying hybrid post-training quantization to the UNet model and weight-only quantization for the rest of the pipeline components. In the hybrid mode, weights in MatMul and Embedding layers are quantized, as well as activations of other layers.
88+
89+
```plain
90+
optimum-cli export openvino --model stabilityai/stable-diffusion-2-1 --dataset conceptual_captions --weight-format int8 ov_model
91+
```
92+
8793
To apply quantization on both weights and activations, you can find more information in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/optimization_ov).
8894

8995
#### Inference:

optimum/exporters/openvino/__main__.py

+18-17
Original file line numberDiff line numberDiff line change
@@ -247,27 +247,12 @@ class StoreAttr(object):
247247

248248
GPTQQuantizer.post_init_model = post_init_model
249249

250-
model = TasksManager.get_model_from_task(
251-
task,
252-
model_name_or_path,
253-
subfolder=subfolder,
254-
revision=revision,
255-
cache_dir=cache_dir,
256-
use_auth_token=use_auth_token,
257-
local_files_only=local_files_only,
258-
force_download=force_download,
259-
trust_remote_code=trust_remote_code,
260-
framework=framework,
261-
device=device,
262-
library_name=library_name,
263-
**loading_kwargs,
264-
)
265-
250+
# Apply quantization in hybrid mode to Stable Diffusion before export
266251
if (
267252
library_name == "diffusers"
268253
and ov_config
269254
and ov_config.quantization_config
270-
and "dataset" in ov_config.quantization_config
255+
and ov_config.quantization_config.get("dataset", None)
271256
):
272257
import huggingface_hub
273258

@@ -301,6 +286,22 @@ class StoreAttr(object):
301286
model.save_pretrained(output)
302287
return
303288

289+
model = TasksManager.get_model_from_task(
290+
task,
291+
model_name_or_path,
292+
subfolder=subfolder,
293+
revision=revision,
294+
cache_dir=cache_dir,
295+
use_auth_token=use_auth_token,
296+
local_files_only=local_files_only,
297+
force_download=force_download,
298+
trust_remote_code=trust_remote_code,
299+
framework=framework,
300+
device=device,
301+
library_name=library_name,
302+
**loading_kwargs,
303+
)
304+
304305
needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None
305306

306307
if needs_pad_token_id:

optimum/intel/openvino/utils.py

+1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
"stable-diffusion": "OVStableDiffusionPipeline",
9797
"stable-diffusion-xl": "OVStableDiffusionXLPipeline",
9898
"pix2struct": "OVModelForPix2Struct",
99+
"latent-consistency": "OVLatentConsistencyModelPipeline",
99100
}
100101

101102

tests/openvino/test_exporters_cli.py

+21
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
OVModelForTokenClassification,
3838
OVStableDiffusionPipeline,
3939
OVStableDiffusionXLPipeline,
40+
OVLatentConsistencyModelPipeline,
4041
)
4142
from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
4243
from optimum.intel.utils.import_utils import is_openvino_tokenizers_available
@@ -77,6 +78,12 @@ class OVCLIExportTestCase(unittest.TestCase):
7778
"stable-diffusion-xl": 0, # not supported
7879
}
7980

81+
SUPPORTED_SD_HYBRID_ARCHITECTURES = (
82+
("stable-diffusion", 72, 195),
83+
("stable-diffusion-xl", 84, 331),
84+
("latent-consistency", 50, 135),
85+
)
86+
8087
SUPPORTED_4BIT_ARCHITECTURES = (("text-generation-with-past", "opt125m"),)
8188

8289
SUPPORTED_4BIT_OPTIONS = ["int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"]
@@ -176,6 +183,20 @@ def test_exporters_cli_int8(self, task: str, model_type: str):
176183
_, num_int8, _ = get_num_quantized_nodes(model)
177184
self.assertEqual(expected_int8[i], num_int8)
178185

186+
@parameterized.expand(SUPPORTED_SD_HYBRID_ARCHITECTURES)
187+
def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: int, exp_num_int8: int):
188+
with TemporaryDirectory() as tmpdir:
189+
subprocess.run(
190+
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} "
191+
f"--task {model_type} --dataset laion/filtered-wit --weight-format int8 {tmpdir}",
192+
shell=True,
193+
check=True,
194+
)
195+
model = eval(_HEAD_TO_AUTOMODELS[model_type]).from_pretrained(tmpdir)
196+
num_fq, num_int8, _ = get_num_quantized_nodes(model.unet)
197+
self.assertEqual(exp_num_int8, num_int8)
198+
self.assertEqual(exp_num_fq, num_fq)
199+
179200
@parameterized.expand(TEST_4BIT_CONFIGURATONS)
180201
def test_exporters_cli_int4(self, task: str, model_type: str, option: str):
181202
with TemporaryDirectory() as tmpdir:

0 commit comments

Comments
 (0)