From 315ff191afed70cb65b62562887740fa3017cc99 Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Wed, 20 Mar 2024 11:39:28 +0000 Subject: [PATCH 1/8] Export hybrid StableDiffusion models via optimum-cli --- optimum/commands/export/openvino.py | 11 ++++++ optimum/exporters/openvino/__main__.py | 38 ++++++++++++++++++++ optimum/intel/openvino/modeling_diffusion.py | 2 +- 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index 6c17a333ef..4c217c56c4 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -104,6 +104,16 @@ def parse_args_openvino(parser: "ArgumentParser"): default=None, help=("The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization."), ) + optional_group.add_argument( + "--dataset", + type=str, + default=None, + help=( + "The dataset used for data-aware compression or quantization with NNCF. " + "You can use the one from the list ['wikitext2','c4','c4-new','ptb','ptb-new'] for LLLMs " + "or ['conceptual_captions','laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit'] for diffusion models." + ), + ) optional_group.add_argument( "--disable-stateful", action="store_true", @@ -195,6 +205,7 @@ def run(self): ) quantization_config["sym"] = "asym" not in self.args.weight_format quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64 + quantization_config["dataset"] = self.args.dataset ov_config = OVConfig(quantization_config=quantization_config) # TODO : add input shapes diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 8b8cc09fc1..053994c077 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -284,6 +284,44 @@ class StoreAttr(object): **loading_kwargs, ) + if ( + library_name == "diffusers" + and ov_config + and ov_config.quantization_config + and "dataset" in ov_config.quantization_config + ): + import huggingface_hub + + model_info = huggingface_hub.model_info(model_name_or_path, revision=revision) + class_name = model_info.config["diffusers"]["_class_name"] + if class_name == "LatentConsistencyModelPipeline": + from optimum.intel import OVLatentConsistencyModelPipeline + + model_cls = OVLatentConsistencyModelPipeline + elif class_name == "StableDiffusionXLPipeline": + from optimum.intel import OVStableDiffusionXLPipeline + + model_cls = OVStableDiffusionXLPipeline + elif class_name == "StableDiffusionPipeline": + from optimum.intel import OVStableDiffusionPipeline + + model_cls = OVStableDiffusionPipeline + else: + raise NotImplementedError(f"{class_name} doesn't support quantization in hybrid mode.") + + model = model_cls.from_pretrained( + model_id=model_name_or_path, + export=True, + quantization_config=ov_config.quantization_config, + cache_dir=cache_dir, + trust_remote_code=trust_remote_code, + revision=revision, + force_download=force_download, + use_auth_token=use_auth_token, + ) + model.save_pretrained(output) + return + needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None if needs_pad_token_id: diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 1e562749b2..7bc7cca04c 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -387,7 +387,7 @@ def transform_fn(data_item): self.__call__(**inputs, height=height, width=width) else: self.__call__(*inputs, height=height, width=width) - if len(calibration_data) > num_samples: + if len(calibration_data) >= num_samples: break self.unet.request = self.unet.request.request From c33d62ac86400b738bd9c3cb9f8faf7f69a7cd89 Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Wed, 20 Mar 2024 18:49:21 +0000 Subject: [PATCH 2/8] Add doc and test --- README.md | 8 +++++- optimum/exporters/openvino/__main__.py | 35 +++++++++++++------------- optimum/intel/openvino/utils.py | 1 + tests/openvino/test_exporters_cli.py | 21 ++++++++++++++++ 4 files changed, 47 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 78ca130145..41537d8971 100644 --- a/README.md +++ b/README.md @@ -78,12 +78,18 @@ It is possible to export your model to the [OpenVINO IR](https://docs.openvino.a optimum-cli export openvino --model gpt2 ov_model ``` -You can also apply 8-bit weight-only quantization when exporting your model : the model linear and embedding weights will be quantized to INT8, the activations will be kept in floating point precision. +You can also apply 8-bit weight-only quantization when exporting your model : the model linear, embedding and convolution weights will be quantized to INT8, the activations will be kept in floating point precision. ```plain optimum-cli export openvino --model gpt2 --weight-format int8 ov_model ``` +Quantization in hybrid mode can be applied to Stable Diffusion pipeline during model export. This involves applying hybrid post-training quantization to the UNet model and weight-only quantization for the rest of the pipeline components. In the hybrid mode, weights in MatMul and Embedding layers are quantized, as well as activations of other layers. + +```plain +optimum-cli export openvino --model stabilityai/stable-diffusion-2-1 --dataset conceptual_captions --weight-format int8 ov_model +``` + To apply quantization on both weights and activations, you can find more information in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/optimization_ov). #### Inference: diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 053994c077..7a0afcfba1 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -268,27 +268,12 @@ class StoreAttr(object): GPTQQuantizer.post_init_model = post_init_model - model = TasksManager.get_model_from_task( - task, - model_name_or_path, - subfolder=subfolder, - revision=revision, - cache_dir=cache_dir, - use_auth_token=use_auth_token, - local_files_only=local_files_only, - force_download=force_download, - trust_remote_code=trust_remote_code, - framework=framework, - device=device, - library_name=library_name, - **loading_kwargs, - ) - + # Apply quantization in hybrid mode to Stable Diffusion before export if ( library_name == "diffusers" and ov_config and ov_config.quantization_config - and "dataset" in ov_config.quantization_config + and ov_config.quantization_config.get("dataset", None) ): import huggingface_hub @@ -322,6 +307,22 @@ class StoreAttr(object): model.save_pretrained(output) return + model = TasksManager.get_model_from_task( + task, + model_name_or_path, + subfolder=subfolder, + revision=revision, + cache_dir=cache_dir, + use_auth_token=use_auth_token, + local_files_only=local_files_only, + force_download=force_download, + trust_remote_code=trust_remote_code, + framework=framework, + device=device, + library_name=library_name, + **loading_kwargs, + ) + needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None if needs_pad_token_id: diff --git a/optimum/intel/openvino/utils.py b/optimum/intel/openvino/utils.py index a0439d2129..4d1479f733 100644 --- a/optimum/intel/openvino/utils.py +++ b/optimum/intel/openvino/utils.py @@ -96,6 +96,7 @@ "stable-diffusion": "OVStableDiffusionPipeline", "stable-diffusion-xl": "OVStableDiffusionXLPipeline", "pix2struct": "OVModelForPix2Struct", + "latent-consistency": "OVLatentConsistencyModelPipeline", } diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index 46c6e3c69a..cafad1fb93 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -37,6 +37,7 @@ OVModelForTokenClassification, OVStableDiffusionPipeline, OVStableDiffusionXLPipeline, + OVLatentConsistencyModelPipeline, ) from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS from optimum.intel.utils.import_utils import is_openvino_tokenizers_available @@ -77,6 +78,12 @@ class OVCLIExportTestCase(unittest.TestCase): "stable-diffusion-xl": 0, # not supported } + SUPPORTED_SD_HYBRID_ARCHITECTURES = ( + ("stable-diffusion", 72, 195), + ("stable-diffusion-xl", 84, 331), + ("latent-consistency", 50, 135), + ) + SUPPORTED_4BIT_ARCHITECTURES = (("text-generation-with-past", "opt125m"),) SUPPORTED_4BIT_OPTIONS = ["int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"] @@ -176,6 +183,20 @@ def test_exporters_cli_int8(self, task: str, model_type: str): _, num_int8, _ = get_num_quantized_nodes(model) self.assertEqual(expected_int8[i], num_int8) + @parameterized.expand(SUPPORTED_SD_HYBRID_ARCHITECTURES) + def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: int, exp_num_int8: int): + with TemporaryDirectory() as tmpdir: + subprocess.run( + f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} " + f"--task {model_type} --dataset laion/filtered-wit --weight-format int8 {tmpdir}", + shell=True, + check=True, + ) + model = eval(_HEAD_TO_AUTOMODELS[model_type]).from_pretrained(tmpdir) + num_fq, num_int8, _ = get_num_quantized_nodes(model.unet) + self.assertEqual(exp_num_int8, num_int8) + self.assertEqual(exp_num_fq, num_fq) + @parameterized.expand(TEST_4BIT_CONFIGURATONS) def test_exporters_cli_int4(self, task: str, model_type: str, option: str): with TemporaryDirectory() as tmpdir: From 768364a1abfad42c7862af5268d14f0c96bf5899 Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Thu, 21 Mar 2024 13:24:53 +0000 Subject: [PATCH 3/8] Remove huggingface_hub --- optimum/exporters/openvino/__main__.py | 43 ++++++++++++-------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 7a0afcfba1..c4bd0419aa 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -268,6 +268,22 @@ class StoreAttr(object): GPTQQuantizer.post_init_model = post_init_model + model = TasksManager.get_model_from_task( + task, + model_name_or_path, + subfolder=subfolder, + revision=revision, + cache_dir=cache_dir, + use_auth_token=use_auth_token, + local_files_only=local_files_only, + force_download=force_download, + trust_remote_code=trust_remote_code, + framework=framework, + device=device, + library_name=library_name, + **loading_kwargs, + ) + # Apply quantization in hybrid mode to Stable Diffusion before export if ( library_name == "diffusers" @@ -275,19 +291,16 @@ class StoreAttr(object): and ov_config.quantization_config and ov_config.quantization_config.get("dataset", None) ): - import huggingface_hub - - model_info = huggingface_hub.model_info(model_name_or_path, revision=revision) - class_name = model_info.config["diffusers"]["_class_name"] - if class_name == "LatentConsistencyModelPipeline": + class_name = model.__class__.__name__ + if "LatentConsistencyModelPipeline" in class_name: from optimum.intel import OVLatentConsistencyModelPipeline model_cls = OVLatentConsistencyModelPipeline - elif class_name == "StableDiffusionXLPipeline": + elif "StableDiffusionXLPipeline" in class_name: from optimum.intel import OVStableDiffusionXLPipeline model_cls = OVStableDiffusionXLPipeline - elif class_name == "StableDiffusionPipeline": + elif "StableDiffusionPipeline" in class_name: from optimum.intel import OVStableDiffusionPipeline model_cls = OVStableDiffusionPipeline @@ -307,22 +320,6 @@ class StoreAttr(object): model.save_pretrained(output) return - model = TasksManager.get_model_from_task( - task, - model_name_or_path, - subfolder=subfolder, - revision=revision, - cache_dir=cache_dir, - use_auth_token=use_auth_token, - local_files_only=local_files_only, - force_download=force_download, - trust_remote_code=trust_remote_code, - framework=framework, - device=device, - library_name=library_name, - **loading_kwargs, - ) - needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None if needs_pad_token_id: From 2f2ce9b9aa3df46407999510909ef8842f390774 Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Thu, 4 Apr 2024 13:01:54 +0100 Subject: [PATCH 4/8] remove quantization from main_export --- optimum/commands/export/openvino.py | 20 ++++++- optimum/exporters/openvino/__main__.py | 77 +++++++++++++------------- 2 files changed, 56 insertions(+), 41 deletions(-) diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index 4c217c56c4..819162e702 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -161,7 +161,7 @@ def parse_args(parser: "ArgumentParser"): return parse_args_openvino(parser) def run(self): - from ...exporters.openvino.__main__ import main_export + from ...exporters.openvino.__main__ import main_export, get_relevant_task, export_optimized_diffusion_model from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS, OVConfig if self.args.fp16: @@ -208,11 +208,22 @@ def run(self): quantization_config["dataset"] = self.args.dataset ov_config = OVConfig(quantization_config=quantization_config) + library_name = TasksManager.infer_library_from_model(self.args.model) + task = get_relevant_task(self.args.task, self.args.model) + saved_dir = self.args.output + + if library_name == "diffusers" and ov_config and ov_config.quantization_config.get("dataset"): + import tempfile + from copy import deepcopy + saved_dir = tempfile.mkdtemp() + quantization_config = deepcopy(ov_config.quantization_config) + ov_config.quantization_config = {} + # TODO : add input shapes main_export( model_name_or_path=self.args.model, - output=self.args.output, - task=self.args.task, + output=saved_dir, + task=task, framework=self.args.framework, cache_dir=self.args.cache_dir, trust_remote_code=self.args.trust_remote_code, @@ -223,3 +234,6 @@ def run(self): library_name=self.args.library # **input_shapes, ) + + if saved_dir != self.args.output: + export_optimized_diffusion_model(saved_dir, self.args.output, task, quantization_config) diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index c4bd0419aa..c56fbd3688 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -77,7 +77,7 @@ def main_export( model_name_or_path (`str`): Model ID on huggingface.co or path on disk to the model repository to export. output (`Union[str, Path]`): - Path indicating the directory where to store the generated ONNX model. + Path indicating the directory where to store the generated OpenVINO model. > Optional parameters @@ -161,7 +161,7 @@ def main_export( ov_config = OVConfig(quantization_config=q_config) original_task = task - task = TasksManager.map_from_synonym(task) + task = get_relevant_task(task, model_name_or_path) framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework) library_name_is_not_provided = library_name is None library_name = TasksManager.infer_library_from_model( @@ -284,42 +284,6 @@ class StoreAttr(object): **loading_kwargs, ) - # Apply quantization in hybrid mode to Stable Diffusion before export - if ( - library_name == "diffusers" - and ov_config - and ov_config.quantization_config - and ov_config.quantization_config.get("dataset", None) - ): - class_name = model.__class__.__name__ - if "LatentConsistencyModelPipeline" in class_name: - from optimum.intel import OVLatentConsistencyModelPipeline - - model_cls = OVLatentConsistencyModelPipeline - elif "StableDiffusionXLPipeline" in class_name: - from optimum.intel import OVStableDiffusionXLPipeline - - model_cls = OVStableDiffusionXLPipeline - elif "StableDiffusionPipeline" in class_name: - from optimum.intel import OVStableDiffusionPipeline - - model_cls = OVStableDiffusionPipeline - else: - raise NotImplementedError(f"{class_name} doesn't support quantization in hybrid mode.") - - model = model_cls.from_pretrained( - model_id=model_name_or_path, - export=True, - quantization_config=ov_config.quantization_config, - cache_dir=cache_dir, - trust_remote_code=trust_remote_code, - revision=revision, - force_download=force_download, - use_auth_token=use_auth_token, - ) - model.save_pretrained(output) - return - needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None if needs_pad_token_id: @@ -412,3 +376,40 @@ class StoreAttr(object): if do_gptq_patching: torch.cuda.is_available = orig_cuda_check GPTQQuantizer.post_init_model = orig_post_init_model + + +def get_relevant_task(task, model_name_or_path): + relevant_task = TasksManager.map_from_synonym(task) + if relevant_task == "auto": + try: + relevant_task = TasksManager.infer_task_from_model(model_name_or_path) + except KeyError as e: + raise KeyError( + f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" + ) + except RequestsConnectionError as e: + raise RequestsConnectionError( + f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" + ) + return relevant_task + + +def export_optimized_diffusion_model(model_name_or_path, output, task, quantization_config): + task = get_relevant_task(task, model_name_or_path) + if task == "latent-consistency": + from optimum.intel import OVLatentConsistencyModelPipeline + + model_cls = OVLatentConsistencyModelPipeline + elif task == "stable-diffusion-xl": + from optimum.intel import OVStableDiffusionXLPipeline + + model_cls = OVStableDiffusionXLPipeline + elif task == "stable-diffusion": + from optimum.intel import OVStableDiffusionPipeline + + model_cls = OVStableDiffusionPipeline + else: + raise NotImplementedError(f"Quantization in hybrid mode isn't supported for {task}.") + + model = model_cls.from_pretrained(model_id=model_name_or_path, quantization_config=quantization_config) + model.save_pretrained(output) From 30c4a96a3dba488a7324268bbcd433f29a52d2f3 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 10 Apr 2024 15:38:53 +0200 Subject: [PATCH 5/8] remove unused function --- optimum/commands/export/openvino.py | 2 +- optimum/exporters/openvino/__main__.py | 39 +------------------------- tests/openvino/test_exporters_cli.py | 3 +- 3 files changed, 3 insertions(+), 41 deletions(-) diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index 819162e702..d4042b02e5 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -161,7 +161,7 @@ def parse_args(parser: "ArgumentParser"): return parse_args_openvino(parser) def run(self): - from ...exporters.openvino.__main__ import main_export, get_relevant_task, export_optimized_diffusion_model + from ...exporters.openvino.__main__ import main_export from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS, OVConfig if self.args.fp16: diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index c56fbd3688..5f74c1de8b 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -161,7 +161,7 @@ def main_export( ov_config = OVConfig(quantization_config=q_config) original_task = task - task = get_relevant_task(task, model_name_or_path) + task = TasksManager.map_from_synonym(task) framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework) library_name_is_not_provided = library_name is None library_name = TasksManager.infer_library_from_model( @@ -376,40 +376,3 @@ class StoreAttr(object): if do_gptq_patching: torch.cuda.is_available = orig_cuda_check GPTQQuantizer.post_init_model = orig_post_init_model - - -def get_relevant_task(task, model_name_or_path): - relevant_task = TasksManager.map_from_synonym(task) - if relevant_task == "auto": - try: - relevant_task = TasksManager.infer_task_from_model(model_name_or_path) - except KeyError as e: - raise KeyError( - f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" - ) - except RequestsConnectionError as e: - raise RequestsConnectionError( - f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" - ) - return relevant_task - - -def export_optimized_diffusion_model(model_name_or_path, output, task, quantization_config): - task = get_relevant_task(task, model_name_or_path) - if task == "latent-consistency": - from optimum.intel import OVLatentConsistencyModelPipeline - - model_cls = OVLatentConsistencyModelPipeline - elif task == "stable-diffusion-xl": - from optimum.intel import OVStableDiffusionXLPipeline - - model_cls = OVStableDiffusionXLPipeline - elif task == "stable-diffusion": - from optimum.intel import OVStableDiffusionPipeline - - model_cls = OVStableDiffusionPipeline - else: - raise NotImplementedError(f"Quantization in hybrid mode isn't supported for {task}.") - - model = model_cls.from_pretrained(model_id=model_name_or_path, quantization_config=quantization_config) - model.save_pretrained(output) diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index cafad1fb93..2ca9bdef0f 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -187,8 +187,7 @@ def test_exporters_cli_int8(self, task: str, model_type: str): def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: int, exp_num_int8: int): with TemporaryDirectory() as tmpdir: subprocess.run( - f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} " - f"--task {model_type} --dataset laion/filtered-wit --weight-format int8 {tmpdir}", + f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --dataset laion/filtered-wit --weight-format int8 {tmpdir}", shell=True, check=True, ) From 13e44b033ff4679070a47048cbc25259e31e3b57 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 10 Apr 2024 15:35:29 +0200 Subject: [PATCH 6/8] Infer task by loading the diffusers config --- optimum/commands/export/openvino.py | 71 +++++++++++++++++++---------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index d4042b02e5..6ce9e3731e 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -20,6 +20,7 @@ from ...exporters import TasksManager from ..base import BaseOptimumCLICommand, CommandInfo +from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available logger = logging.getLogger(__name__) @@ -209,31 +210,51 @@ def run(self): ov_config = OVConfig(quantization_config=quantization_config) library_name = TasksManager.infer_library_from_model(self.args.model) - task = get_relevant_task(self.args.task, self.args.model) - saved_dir = self.args.output if library_name == "diffusers" and ov_config and ov_config.quantization_config.get("dataset"): - import tempfile - from copy import deepcopy - saved_dir = tempfile.mkdtemp() - quantization_config = deepcopy(ov_config.quantization_config) - ov_config.quantization_config = {} - - # TODO : add input shapes - main_export( - model_name_or_path=self.args.model, - output=saved_dir, - task=task, - framework=self.args.framework, - cache_dir=self.args.cache_dir, - trust_remote_code=self.args.trust_remote_code, - pad_token_id=self.args.pad_token_id, - ov_config=ov_config, - stateful=not self.args.disable_stateful, - convert_tokenizer=self.args.convert_tokenizer, - library_name=self.args.library - # **input_shapes, - ) - if saved_dir != self.args.output: - export_optimized_diffusion_model(saved_dir, self.args.output, task, quantization_config) + if not is_diffusers_available(): + raise ValueError(DIFFUSERS_IMPORT_ERROR.format("Export of diffusers models")) + + from diffusers import DiffusionPipeline + + diffusers_config = DiffusionPipeline.load_config(self.args.model) + class_name = diffusers_config.get("_class_name", None) + + if class_name == "LatentConsistencyModelPipeline": + + from optimum.intel import OVLatentConsistencyModelPipeline + + model_cls = OVLatentConsistencyModelPipeline + + elif class_name == "StableDiffusionXLPipeline": + + from optimum.intel import OVStableDiffusionXLPipeline + + model_cls = OVStableDiffusionXLPipeline + elif class_name == "StableDiffusionPipeline": + from optimum.intel import OVStableDiffusionPipeline + + model_cls = OVStableDiffusionPipeline + else: + raise NotImplementedError(f"Quantization in hybrid mode isn't supported for class {class_name}.") + + model = model_cls.from_pretrained(self.args.model, export=True, quantization_config=ov_config.quantization_config) + model.save_pretrained(self.args.output) + + else: + # TODO : add input shapes + main_export( + model_name_or_path=self.args.model, + output=self.args.output, + task=self.args.task, + framework=self.args.framework, + cache_dir=self.args.cache_dir, + trust_remote_code=self.args.trust_remote_code, + pad_token_id=self.args.pad_token_id, + ov_config=ov_config, + stateful=not self.args.disable_stateful, + convert_tokenizer=self.args.convert_tokenizer, + library_name=library_name, + # **input_shapes, + ) From 6f283282bae849d44fa2e7648ad2d165058c432a Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Wed, 17 Apr 2024 12:13:00 +0100 Subject: [PATCH 7/8] Fix style --- optimum/commands/export/openvino.py | 9 ++++----- tests/openvino/test_exporters_cli.py | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index 6ce9e3731e..ccbc589413 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -19,8 +19,8 @@ from typing import TYPE_CHECKING, Optional from ...exporters import TasksManager -from ..base import BaseOptimumCLICommand, CommandInfo from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available +from ..base import BaseOptimumCLICommand, CommandInfo logger = logging.getLogger(__name__) @@ -212,7 +212,6 @@ def run(self): library_name = TasksManager.infer_library_from_model(self.args.model) if library_name == "diffusers" and ov_config and ov_config.quantization_config.get("dataset"): - if not is_diffusers_available(): raise ValueError(DIFFUSERS_IMPORT_ERROR.format("Export of diffusers models")) @@ -222,13 +221,11 @@ def run(self): class_name = diffusers_config.get("_class_name", None) if class_name == "LatentConsistencyModelPipeline": - from optimum.intel import OVLatentConsistencyModelPipeline model_cls = OVLatentConsistencyModelPipeline elif class_name == "StableDiffusionXLPipeline": - from optimum.intel import OVStableDiffusionXLPipeline model_cls = OVStableDiffusionXLPipeline @@ -239,7 +236,9 @@ def run(self): else: raise NotImplementedError(f"Quantization in hybrid mode isn't supported for class {class_name}.") - model = model_cls.from_pretrained(self.args.model, export=True, quantization_config=ov_config.quantization_config) + model = model_cls.from_pretrained( + self.args.model, export=True, quantization_config=ov_config.quantization_config + ) model.save_pretrained(self.args.output) else: diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index 2ca9bdef0f..7d618c530e 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -26,6 +26,7 @@ from optimum.exporters.openvino.__main__ import main_export from optimum.intel import ( # noqa + OVLatentConsistencyModelPipeline, OVModelForAudioClassification, OVModelForCausalLM, OVModelForFeatureExtraction, @@ -37,7 +38,6 @@ OVModelForTokenClassification, OVStableDiffusionPipeline, OVStableDiffusionXLPipeline, - OVLatentConsistencyModelPipeline, ) from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS from optimum.intel.utils.import_utils import is_openvino_tokenizers_available From a42de51b85b851e5a93f8a5bbe3a2837b2043f00 Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Thu, 18 Apr 2024 12:26:18 +0100 Subject: [PATCH 8/8] fix tests --- optimum/commands/export/openvino.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index ccbc589413..40901fbf90 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -211,7 +211,12 @@ def run(self): library_name = TasksManager.infer_library_from_model(self.args.model) - if library_name == "diffusers" and ov_config and ov_config.quantization_config.get("dataset"): + if ( + library_name == "diffusers" + and ov_config + and ov_config.quantization_config + and ov_config.quantization_config.dataset is not None + ): if not is_diffusers_available(): raise ValueError(DIFFUSERS_IMPORT_ERROR.format("Export of diffusers models"))