Skip to content

Commit 4651ac2

Browse files
l-batecharlaix
andauthoredApr 18, 2024··
Export hybrid StableDiffusion models via optimum-cli (#618)
* Export hybrid StableDiffusion models via optimum-cli * Add doc and test * Remove huggingface_hub * remove quantization from main_export * remove unused function * Infer task by loading the diffusers config * Fix style * fix tests --------- Co-authored-by: Ella Charlaix <ella@huggingface.co>
1 parent 228a3e0 commit 4651ac2

File tree

6 files changed

+95
-18
lines changed

6 files changed

+95
-18
lines changed
 

‎README.md

+7-1
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,18 @@ It is possible to export your model to the [OpenVINO IR](https://docs.openvino.a
7878
optimum-cli export openvino --model gpt2 ov_model
7979
```
8080

81-
You can also apply 8-bit weight-only quantization when exporting your model : the model linear and embedding weights will be quantized to INT8, the activations will be kept in floating point precision.
81+
You can also apply 8-bit weight-only quantization when exporting your model : the model linear, embedding and convolution weights will be quantized to INT8, the activations will be kept in floating point precision.
8282

8383
```plain
8484
optimum-cli export openvino --model gpt2 --weight-format int8 ov_model
8585
```
8686

87+
Quantization in hybrid mode can be applied to Stable Diffusion pipeline during model export. This involves applying hybrid post-training quantization to the UNet model and weight-only quantization for the rest of the pipeline components. In the hybrid mode, weights in MatMul and Embedding layers are quantized, as well as activations of other layers.
88+
89+
```plain
90+
optimum-cli export openvino --model stabilityai/stable-diffusion-2-1 --dataset conceptual_captions --weight-format int8 ov_model
91+
```
92+
8793
To apply quantization on both weights and activations, you can find more information in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/optimization_ov).
8894

8995
#### Inference:

‎optimum/commands/export/openvino.py

+65-15
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from typing import TYPE_CHECKING, Optional
2020

2121
from ...exporters import TasksManager
22+
from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available
2223
from ..base import BaseOptimumCLICommand, CommandInfo
2324

2425

@@ -104,6 +105,16 @@ def parse_args_openvino(parser: "ArgumentParser"):
104105
default=None,
105106
help=("The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization."),
106107
)
108+
optional_group.add_argument(
109+
"--dataset",
110+
type=str,
111+
default=None,
112+
help=(
113+
"The dataset used for data-aware compression or quantization with NNCF. "
114+
"You can use the one from the list ['wikitext2','c4','c4-new','ptb','ptb-new'] for LLLMs "
115+
"or ['conceptual_captions','laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit'] for diffusion models."
116+
),
117+
)
107118
optional_group.add_argument(
108119
"--disable-stateful",
109120
action="store_true",
@@ -195,20 +206,59 @@ def run(self):
195206
)
196207
quantization_config["sym"] = "asym" not in self.args.weight_format
197208
quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64
209+
quantization_config["dataset"] = self.args.dataset
198210
ov_config = OVConfig(quantization_config=quantization_config)
199211

200-
# TODO : add input shapes
201-
main_export(
202-
model_name_or_path=self.args.model,
203-
output=self.args.output,
204-
task=self.args.task,
205-
framework=self.args.framework,
206-
cache_dir=self.args.cache_dir,
207-
trust_remote_code=self.args.trust_remote_code,
208-
pad_token_id=self.args.pad_token_id,
209-
ov_config=ov_config,
210-
stateful=not self.args.disable_stateful,
211-
convert_tokenizer=self.args.convert_tokenizer,
212-
library_name=self.args.library
213-
# **input_shapes,
214-
)
212+
library_name = TasksManager.infer_library_from_model(self.args.model)
213+
214+
if (
215+
library_name == "diffusers"
216+
and ov_config
217+
and ov_config.quantization_config
218+
and ov_config.quantization_config.dataset is not None
219+
):
220+
if not is_diffusers_available():
221+
raise ValueError(DIFFUSERS_IMPORT_ERROR.format("Export of diffusers models"))
222+
223+
from diffusers import DiffusionPipeline
224+
225+
diffusers_config = DiffusionPipeline.load_config(self.args.model)
226+
class_name = diffusers_config.get("_class_name", None)
227+
228+
if class_name == "LatentConsistencyModelPipeline":
229+
from optimum.intel import OVLatentConsistencyModelPipeline
230+
231+
model_cls = OVLatentConsistencyModelPipeline
232+
233+
elif class_name == "StableDiffusionXLPipeline":
234+
from optimum.intel import OVStableDiffusionXLPipeline
235+
236+
model_cls = OVStableDiffusionXLPipeline
237+
elif class_name == "StableDiffusionPipeline":
238+
from optimum.intel import OVStableDiffusionPipeline
239+
240+
model_cls = OVStableDiffusionPipeline
241+
else:
242+
raise NotImplementedError(f"Quantization in hybrid mode isn't supported for class {class_name}.")
243+
244+
model = model_cls.from_pretrained(
245+
self.args.model, export=True, quantization_config=ov_config.quantization_config
246+
)
247+
model.save_pretrained(self.args.output)
248+
249+
else:
250+
# TODO : add input shapes
251+
main_export(
252+
model_name_or_path=self.args.model,
253+
output=self.args.output,
254+
task=self.args.task,
255+
framework=self.args.framework,
256+
cache_dir=self.args.cache_dir,
257+
trust_remote_code=self.args.trust_remote_code,
258+
pad_token_id=self.args.pad_token_id,
259+
ov_config=ov_config,
260+
stateful=not self.args.disable_stateful,
261+
convert_tokenizer=self.args.convert_tokenizer,
262+
library_name=library_name,
263+
# **input_shapes,
264+
)

‎optimum/exporters/openvino/__main__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def main_export(
7777
model_name_or_path (`str`):
7878
Model ID on huggingface.co or path on disk to the model repository to export.
7979
output (`Union[str, Path]`):
80-
Path indicating the directory where to store the generated ONNX model.
80+
Path indicating the directory where to store the generated OpenVINO model.
8181
8282
> Optional parameters
8383

‎optimum/intel/openvino/modeling_diffusion.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ def transform_fn(data_item):
387387
self.__call__(**inputs, height=height, width=width)
388388
else:
389389
self.__call__(*inputs, height=height, width=width)
390-
if len(calibration_data) > num_samples:
390+
if len(calibration_data) >= num_samples:
391391
break
392392

393393
self.unet.request = self.unet.request.request

‎optimum/intel/openvino/utils.py

+1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
"stable-diffusion": "OVStableDiffusionPipeline",
9797
"stable-diffusion-xl": "OVStableDiffusionXLPipeline",
9898
"pix2struct": "OVModelForPix2Struct",
99+
"latent-consistency": "OVLatentConsistencyModelPipeline",
99100
}
100101

101102

‎tests/openvino/test_exporters_cli.py

+20
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
from optimum.exporters.openvino.__main__ import main_export
2828
from optimum.intel import ( # noqa
29+
OVLatentConsistencyModelPipeline,
2930
OVModelForAudioClassification,
3031
OVModelForCausalLM,
3132
OVModelForFeatureExtraction,
@@ -77,6 +78,12 @@ class OVCLIExportTestCase(unittest.TestCase):
7778
"stable-diffusion-xl": 0, # not supported
7879
}
7980

81+
SUPPORTED_SD_HYBRID_ARCHITECTURES = (
82+
("stable-diffusion", 72, 195),
83+
("stable-diffusion-xl", 84, 331),
84+
("latent-consistency", 50, 135),
85+
)
86+
8087
SUPPORTED_4BIT_ARCHITECTURES = (("text-generation-with-past", "opt125m"),)
8188

8289
SUPPORTED_4BIT_OPTIONS = ["int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"]
@@ -176,6 +183,19 @@ def test_exporters_cli_int8(self, task: str, model_type: str):
176183
_, num_int8, _ = get_num_quantized_nodes(model)
177184
self.assertEqual(expected_int8[i], num_int8)
178185

186+
@parameterized.expand(SUPPORTED_SD_HYBRID_ARCHITECTURES)
187+
def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: int, exp_num_int8: int):
188+
with TemporaryDirectory() as tmpdir:
189+
subprocess.run(
190+
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --dataset laion/filtered-wit --weight-format int8 {tmpdir}",
191+
shell=True,
192+
check=True,
193+
)
194+
model = eval(_HEAD_TO_AUTOMODELS[model_type]).from_pretrained(tmpdir)
195+
num_fq, num_int8, _ = get_num_quantized_nodes(model.unet)
196+
self.assertEqual(exp_num_int8, num_int8)
197+
self.assertEqual(exp_num_fq, num_fq)
198+
179199
@parameterized.expand(TEST_4BIT_CONFIGURATONS)
180200
def test_exporters_cli_int4(self, task: str, model_type: str, option: str):
181201
with TemporaryDirectory() as tmpdir:

0 commit comments

Comments
 (0)
Please sign in to comment.