From 315ff191afed70cb65b62562887740fa3017cc99 Mon Sep 17 00:00:00 2001
From: Liubov Talamanova <liubov.talamanova@intel.com>
Date: Wed, 20 Mar 2024 11:39:28 +0000
Subject: [PATCH 1/8] Export hybrid StableDiffusion models via optimum-cli

---
 optimum/commands/export/openvino.py          | 11 ++++++
 optimum/exporters/openvino/__main__.py       | 38 ++++++++++++++++++++
 optimum/intel/openvino/modeling_diffusion.py |  2 +-
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
index 6c17a333ef..4c217c56c4 100644
--- a/optimum/commands/export/openvino.py
+++ b/optimum/commands/export/openvino.py
@@ -104,6 +104,16 @@ def parse_args_openvino(parser: "ArgumentParser"):
         default=None,
         help=("The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization."),
     )
+    optional_group.add_argument(
+        "--dataset",
+        type=str,
+        default=None,
+        help=(
+            "The dataset used for data-aware compression or quantization with NNCF. "
+            "You can use the one from the list ['wikitext2','c4','c4-new','ptb','ptb-new'] for LLLMs "
+            "or ['conceptual_captions','laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit'] for diffusion models."
+        ),
+    )
     optional_group.add_argument(
         "--disable-stateful",
         action="store_true",
@@ -195,6 +205,7 @@ def run(self):
                 )
                 quantization_config["sym"] = "asym" not in self.args.weight_format
                 quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64
+            quantization_config["dataset"] = self.args.dataset
             ov_config = OVConfig(quantization_config=quantization_config)
 
         # TODO : add input shapes
diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
index 8b8cc09fc1..053994c077 100644
--- a/optimum/exporters/openvino/__main__.py
+++ b/optimum/exporters/openvino/__main__.py
@@ -284,6 +284,44 @@ class StoreAttr(object):
         **loading_kwargs,
     )
 
+    if (
+        library_name == "diffusers"
+        and ov_config
+        and ov_config.quantization_config
+        and "dataset" in ov_config.quantization_config
+    ):
+        import huggingface_hub
+
+        model_info = huggingface_hub.model_info(model_name_or_path, revision=revision)
+        class_name = model_info.config["diffusers"]["_class_name"]
+        if class_name == "LatentConsistencyModelPipeline":
+            from optimum.intel import OVLatentConsistencyModelPipeline
+
+            model_cls = OVLatentConsistencyModelPipeline
+        elif class_name == "StableDiffusionXLPipeline":
+            from optimum.intel import OVStableDiffusionXLPipeline
+
+            model_cls = OVStableDiffusionXLPipeline
+        elif class_name == "StableDiffusionPipeline":
+            from optimum.intel import OVStableDiffusionPipeline
+
+            model_cls = OVStableDiffusionPipeline
+        else:
+            raise NotImplementedError(f"{class_name} doesn't support quantization in hybrid mode.")
+
+        model = model_cls.from_pretrained(
+            model_id=model_name_or_path,
+            export=True,
+            quantization_config=ov_config.quantization_config,
+            cache_dir=cache_dir,
+            trust_remote_code=trust_remote_code,
+            revision=revision,
+            force_download=force_download,
+            use_auth_token=use_auth_token,
+        )
+        model.save_pretrained(output)
+        return
+
     needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None
 
     if needs_pad_token_id:
diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
index 1e562749b2..7bc7cca04c 100644
--- a/optimum/intel/openvino/modeling_diffusion.py
+++ b/optimum/intel/openvino/modeling_diffusion.py
@@ -387,7 +387,7 @@ def transform_fn(data_item):
                 self.__call__(**inputs, height=height, width=width)
             else:
                 self.__call__(*inputs, height=height, width=width)
-            if len(calibration_data) > num_samples:
+            if len(calibration_data) >= num_samples:
                 break
 
         self.unet.request = self.unet.request.request

From c33d62ac86400b738bd9c3cb9f8faf7f69a7cd89 Mon Sep 17 00:00:00 2001
From: Liubov Talamanova <liubov.talamanova@intel.com>
Date: Wed, 20 Mar 2024 18:49:21 +0000
Subject: [PATCH 2/8] Add doc and test

---
 README.md                              |  8 +++++-
 optimum/exporters/openvino/__main__.py | 35 +++++++++++++-------------
 optimum/intel/openvino/utils.py        |  1 +
 tests/openvino/test_exporters_cli.py   | 21 ++++++++++++++++
 4 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 78ca130145..41537d8971 100644
--- a/README.md
+++ b/README.md
@@ -78,12 +78,18 @@ It is possible to export your model to the [OpenVINO IR](https://docs.openvino.a
 optimum-cli export openvino --model gpt2 ov_model
 ```
 
-You can also apply 8-bit weight-only quantization when exporting your model : the model linear and embedding weights will be quantized to INT8, the activations will be kept in floating point precision.
+You can also apply 8-bit weight-only quantization when exporting your model : the model linear, embedding and convolution weights will be quantized to INT8, the activations will be kept in floating point precision.
 
 ```plain
 optimum-cli export openvino --model gpt2 --weight-format int8 ov_model
 ```
 
+Quantization in hybrid mode can be applied to Stable Diffusion pipeline during model export. This involves applying hybrid post-training quantization to the UNet model and weight-only quantization for the rest of the pipeline components. In the hybrid mode, weights in MatMul and Embedding layers are quantized, as well as activations of other layers.
+
+```plain
+optimum-cli export openvino --model stabilityai/stable-diffusion-2-1 --dataset conceptual_captions --weight-format int8 ov_model
+```
+
 To apply quantization on both weights and activations, you can find more information in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/optimization_ov).
 
 #### Inference:
diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
index 053994c077..7a0afcfba1 100644
--- a/optimum/exporters/openvino/__main__.py
+++ b/optimum/exporters/openvino/__main__.py
@@ -268,27 +268,12 @@ class StoreAttr(object):
 
         GPTQQuantizer.post_init_model = post_init_model
 
-    model = TasksManager.get_model_from_task(
-        task,
-        model_name_or_path,
-        subfolder=subfolder,
-        revision=revision,
-        cache_dir=cache_dir,
-        use_auth_token=use_auth_token,
-        local_files_only=local_files_only,
-        force_download=force_download,
-        trust_remote_code=trust_remote_code,
-        framework=framework,
-        device=device,
-        library_name=library_name,
-        **loading_kwargs,
-    )
-
+    # Apply quantization in hybrid mode to Stable Diffusion before export
     if (
         library_name == "diffusers"
         and ov_config
         and ov_config.quantization_config
-        and "dataset" in ov_config.quantization_config
+        and ov_config.quantization_config.get("dataset", None)
     ):
         import huggingface_hub
 
@@ -322,6 +307,22 @@ class StoreAttr(object):
         model.save_pretrained(output)
         return
 
+    model = TasksManager.get_model_from_task(
+        task,
+        model_name_or_path,
+        subfolder=subfolder,
+        revision=revision,
+        cache_dir=cache_dir,
+        use_auth_token=use_auth_token,
+        local_files_only=local_files_only,
+        force_download=force_download,
+        trust_remote_code=trust_remote_code,
+        framework=framework,
+        device=device,
+        library_name=library_name,
+        **loading_kwargs,
+    )
+
     needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None
 
     if needs_pad_token_id:
diff --git a/optimum/intel/openvino/utils.py b/optimum/intel/openvino/utils.py
index a0439d2129..4d1479f733 100644
--- a/optimum/intel/openvino/utils.py
+++ b/optimum/intel/openvino/utils.py
@@ -96,6 +96,7 @@
     "stable-diffusion": "OVStableDiffusionPipeline",
     "stable-diffusion-xl": "OVStableDiffusionXLPipeline",
     "pix2struct": "OVModelForPix2Struct",
+    "latent-consistency": "OVLatentConsistencyModelPipeline",
 }
 
 
diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index 46c6e3c69a..cafad1fb93 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -37,6 +37,7 @@
     OVModelForTokenClassification,
     OVStableDiffusionPipeline,
     OVStableDiffusionXLPipeline,
+    OVLatentConsistencyModelPipeline,
 )
 from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
 from optimum.intel.utils.import_utils import is_openvino_tokenizers_available
@@ -77,6 +78,12 @@ class OVCLIExportTestCase(unittest.TestCase):
         "stable-diffusion-xl": 0,  # not supported
     }
 
+    SUPPORTED_SD_HYBRID_ARCHITECTURES = (
+        ("stable-diffusion", 72, 195),
+        ("stable-diffusion-xl", 84, 331),
+        ("latent-consistency", 50, 135),
+    )
+
     SUPPORTED_4BIT_ARCHITECTURES = (("text-generation-with-past", "opt125m"),)
 
     SUPPORTED_4BIT_OPTIONS = ["int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"]
@@ -176,6 +183,20 @@ def test_exporters_cli_int8(self, task: str, model_type: str):
                 _, num_int8, _ = get_num_quantized_nodes(model)
                 self.assertEqual(expected_int8[i], num_int8)
 
+    @parameterized.expand(SUPPORTED_SD_HYBRID_ARCHITECTURES)
+    def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: int, exp_num_int8: int):
+        with TemporaryDirectory() as tmpdir:
+            subprocess.run(
+                f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} "
+                f"--task {model_type} --dataset laion/filtered-wit --weight-format int8 {tmpdir}",
+                shell=True,
+                check=True,
+            )
+            model = eval(_HEAD_TO_AUTOMODELS[model_type]).from_pretrained(tmpdir)
+            num_fq, num_int8, _ = get_num_quantized_nodes(model.unet)
+            self.assertEqual(exp_num_int8, num_int8)
+            self.assertEqual(exp_num_fq, num_fq)
+
     @parameterized.expand(TEST_4BIT_CONFIGURATONS)
     def test_exporters_cli_int4(self, task: str, model_type: str, option: str):
         with TemporaryDirectory() as tmpdir:

From 768364a1abfad42c7862af5268d14f0c96bf5899 Mon Sep 17 00:00:00 2001
From: Liubov Talamanova <liubov.talamanova@intel.com>
Date: Thu, 21 Mar 2024 13:24:53 +0000
Subject: [PATCH 3/8] Remove huggingface_hub

---
 optimum/exporters/openvino/__main__.py | 43 ++++++++++++--------------
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
index 7a0afcfba1..c4bd0419aa 100644
--- a/optimum/exporters/openvino/__main__.py
+++ b/optimum/exporters/openvino/__main__.py
@@ -268,6 +268,22 @@ class StoreAttr(object):
 
         GPTQQuantizer.post_init_model = post_init_model
 
+    model = TasksManager.get_model_from_task(
+        task,
+        model_name_or_path,
+        subfolder=subfolder,
+        revision=revision,
+        cache_dir=cache_dir,
+        use_auth_token=use_auth_token,
+        local_files_only=local_files_only,
+        force_download=force_download,
+        trust_remote_code=trust_remote_code,
+        framework=framework,
+        device=device,
+        library_name=library_name,
+        **loading_kwargs,
+    )
+
     # Apply quantization in hybrid mode to Stable Diffusion before export
     if (
         library_name == "diffusers"
@@ -275,19 +291,16 @@ class StoreAttr(object):
         and ov_config.quantization_config
         and ov_config.quantization_config.get("dataset", None)
     ):
-        import huggingface_hub
-
-        model_info = huggingface_hub.model_info(model_name_or_path, revision=revision)
-        class_name = model_info.config["diffusers"]["_class_name"]
-        if class_name == "LatentConsistencyModelPipeline":
+        class_name =  model.__class__.__name__
+        if "LatentConsistencyModelPipeline" in class_name:
             from optimum.intel import OVLatentConsistencyModelPipeline
 
             model_cls = OVLatentConsistencyModelPipeline
-        elif class_name == "StableDiffusionXLPipeline":
+        elif "StableDiffusionXLPipeline" in class_name:
             from optimum.intel import OVStableDiffusionXLPipeline
 
             model_cls = OVStableDiffusionXLPipeline
-        elif class_name == "StableDiffusionPipeline":
+        elif "StableDiffusionPipeline" in class_name:
             from optimum.intel import OVStableDiffusionPipeline
 
             model_cls = OVStableDiffusionPipeline
@@ -307,22 +320,6 @@ class StoreAttr(object):
         model.save_pretrained(output)
         return
 
-    model = TasksManager.get_model_from_task(
-        task,
-        model_name_or_path,
-        subfolder=subfolder,
-        revision=revision,
-        cache_dir=cache_dir,
-        use_auth_token=use_auth_token,
-        local_files_only=local_files_only,
-        force_download=force_download,
-        trust_remote_code=trust_remote_code,
-        framework=framework,
-        device=device,
-        library_name=library_name,
-        **loading_kwargs,
-    )
-
     needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None
 
     if needs_pad_token_id:

From 2f2ce9b9aa3df46407999510909ef8842f390774 Mon Sep 17 00:00:00 2001
From: Liubov Talamanova <liubov.talamanova@intel.com>
Date: Thu, 4 Apr 2024 13:01:54 +0100
Subject: [PATCH 4/8] remove quantization from main_export

---
 optimum/commands/export/openvino.py    | 20 ++++++-
 optimum/exporters/openvino/__main__.py | 77 +++++++++++++-------------
 2 files changed, 56 insertions(+), 41 deletions(-)

diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
index 4c217c56c4..819162e702 100644
--- a/optimum/commands/export/openvino.py
+++ b/optimum/commands/export/openvino.py
@@ -161,7 +161,7 @@ def parse_args(parser: "ArgumentParser"):
         return parse_args_openvino(parser)
 
     def run(self):
-        from ...exporters.openvino.__main__ import main_export
+        from ...exporters.openvino.__main__ import main_export, get_relevant_task, export_optimized_diffusion_model
         from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS, OVConfig
 
         if self.args.fp16:
@@ -208,11 +208,22 @@ def run(self):
             quantization_config["dataset"] = self.args.dataset
             ov_config = OVConfig(quantization_config=quantization_config)
 
+        library_name = TasksManager.infer_library_from_model(self.args.model)
+        task = get_relevant_task(self.args.task, self.args.model)
+        saved_dir = self.args.output
+
+        if library_name == "diffusers" and ov_config and ov_config.quantization_config.get("dataset"):
+            import tempfile
+            from copy import deepcopy
+            saved_dir = tempfile.mkdtemp()
+            quantization_config = deepcopy(ov_config.quantization_config)
+            ov_config.quantization_config = {}
+
         # TODO : add input shapes
         main_export(
             model_name_or_path=self.args.model,
-            output=self.args.output,
-            task=self.args.task,
+            output=saved_dir,
+            task=task,
             framework=self.args.framework,
             cache_dir=self.args.cache_dir,
             trust_remote_code=self.args.trust_remote_code,
@@ -223,3 +234,6 @@ def run(self):
             library_name=self.args.library
             # **input_shapes,
         )
+
+        if saved_dir != self.args.output:
+            export_optimized_diffusion_model(saved_dir, self.args.output, task, quantization_config)
diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
index c4bd0419aa..c56fbd3688 100644
--- a/optimum/exporters/openvino/__main__.py
+++ b/optimum/exporters/openvino/__main__.py
@@ -77,7 +77,7 @@ def main_export(
         model_name_or_path (`str`):
             Model ID on huggingface.co or path on disk to the model repository to export.
         output (`Union[str, Path]`):
-            Path indicating the directory where to store the generated ONNX model.
+            Path indicating the directory where to store the generated OpenVINO model.
 
         > Optional parameters
 
@@ -161,7 +161,7 @@ def main_export(
             ov_config = OVConfig(quantization_config=q_config)
 
     original_task = task
-    task = TasksManager.map_from_synonym(task)
+    task = get_relevant_task(task, model_name_or_path)
     framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework)
     library_name_is_not_provided = library_name is None
     library_name = TasksManager.infer_library_from_model(
@@ -284,42 +284,6 @@ class StoreAttr(object):
         **loading_kwargs,
     )
 
-    # Apply quantization in hybrid mode to Stable Diffusion before export
-    if (
-        library_name == "diffusers"
-        and ov_config
-        and ov_config.quantization_config
-        and ov_config.quantization_config.get("dataset", None)
-    ):
-        class_name =  model.__class__.__name__
-        if "LatentConsistencyModelPipeline" in class_name:
-            from optimum.intel import OVLatentConsistencyModelPipeline
-
-            model_cls = OVLatentConsistencyModelPipeline
-        elif "StableDiffusionXLPipeline" in class_name:
-            from optimum.intel import OVStableDiffusionXLPipeline
-
-            model_cls = OVStableDiffusionXLPipeline
-        elif "StableDiffusionPipeline" in class_name:
-            from optimum.intel import OVStableDiffusionPipeline
-
-            model_cls = OVStableDiffusionPipeline
-        else:
-            raise NotImplementedError(f"{class_name} doesn't support quantization in hybrid mode.")
-
-        model = model_cls.from_pretrained(
-            model_id=model_name_or_path,
-            export=True,
-            quantization_config=ov_config.quantization_config,
-            cache_dir=cache_dir,
-            trust_remote_code=trust_remote_code,
-            revision=revision,
-            force_download=force_download,
-            use_auth_token=use_auth_token,
-        )
-        model.save_pretrained(output)
-        return
-
     needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None
 
     if needs_pad_token_id:
@@ -412,3 +376,40 @@ class StoreAttr(object):
     if do_gptq_patching:
         torch.cuda.is_available = orig_cuda_check
         GPTQQuantizer.post_init_model = orig_post_init_model
+
+
+def get_relevant_task(task, model_name_or_path):
+    relevant_task = TasksManager.map_from_synonym(task)
+    if relevant_task == "auto":
+        try:
+            relevant_task = TasksManager.infer_task_from_model(model_name_or_path)
+        except KeyError as e:
+            raise KeyError(
+                f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
+            )
+        except RequestsConnectionError as e:
+            raise RequestsConnectionError(
+                f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
+            )
+    return relevant_task
+
+
+def export_optimized_diffusion_model(model_name_or_path, output, task, quantization_config):
+    task = get_relevant_task(task, model_name_or_path)
+    if task == "latent-consistency":
+        from optimum.intel import OVLatentConsistencyModelPipeline
+
+        model_cls = OVLatentConsistencyModelPipeline
+    elif task == "stable-diffusion-xl":
+        from optimum.intel import OVStableDiffusionXLPipeline
+
+        model_cls = OVStableDiffusionXLPipeline
+    elif task == "stable-diffusion":
+        from optimum.intel import OVStableDiffusionPipeline
+
+        model_cls = OVStableDiffusionPipeline
+    else:
+        raise NotImplementedError(f"Quantization in hybrid mode isn't supported for {task}.")
+
+    model = model_cls.from_pretrained(model_id=model_name_or_path, quantization_config=quantization_config)
+    model.save_pretrained(output)

From 30c4a96a3dba488a7324268bbcd433f29a52d2f3 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 10 Apr 2024 15:38:53 +0200
Subject: [PATCH 5/8] remove unused function

---
 optimum/commands/export/openvino.py    |  2 +-
 optimum/exporters/openvino/__main__.py | 39 +-------------------------
 tests/openvino/test_exporters_cli.py   |  3 +-
 3 files changed, 3 insertions(+), 41 deletions(-)

diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
index 819162e702..d4042b02e5 100644
--- a/optimum/commands/export/openvino.py
+++ b/optimum/commands/export/openvino.py
@@ -161,7 +161,7 @@ def parse_args(parser: "ArgumentParser"):
         return parse_args_openvino(parser)
 
     def run(self):
-        from ...exporters.openvino.__main__ import main_export, get_relevant_task, export_optimized_diffusion_model
+        from ...exporters.openvino.__main__ import main_export
         from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS, OVConfig
 
         if self.args.fp16:
diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
index c56fbd3688..5f74c1de8b 100644
--- a/optimum/exporters/openvino/__main__.py
+++ b/optimum/exporters/openvino/__main__.py
@@ -161,7 +161,7 @@ def main_export(
             ov_config = OVConfig(quantization_config=q_config)
 
     original_task = task
-    task = get_relevant_task(task, model_name_or_path)
+    task = TasksManager.map_from_synonym(task)
     framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework)
     library_name_is_not_provided = library_name is None
     library_name = TasksManager.infer_library_from_model(
@@ -376,40 +376,3 @@ class StoreAttr(object):
     if do_gptq_patching:
         torch.cuda.is_available = orig_cuda_check
         GPTQQuantizer.post_init_model = orig_post_init_model
-
-
-def get_relevant_task(task, model_name_or_path):
-    relevant_task = TasksManager.map_from_synonym(task)
-    if relevant_task == "auto":
-        try:
-            relevant_task = TasksManager.infer_task_from_model(model_name_or_path)
-        except KeyError as e:
-            raise KeyError(
-                f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
-            )
-        except RequestsConnectionError as e:
-            raise RequestsConnectionError(
-                f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}"
-            )
-    return relevant_task
-
-
-def export_optimized_diffusion_model(model_name_or_path, output, task, quantization_config):
-    task = get_relevant_task(task, model_name_or_path)
-    if task == "latent-consistency":
-        from optimum.intel import OVLatentConsistencyModelPipeline
-
-        model_cls = OVLatentConsistencyModelPipeline
-    elif task == "stable-diffusion-xl":
-        from optimum.intel import OVStableDiffusionXLPipeline
-
-        model_cls = OVStableDiffusionXLPipeline
-    elif task == "stable-diffusion":
-        from optimum.intel import OVStableDiffusionPipeline
-
-        model_cls = OVStableDiffusionPipeline
-    else:
-        raise NotImplementedError(f"Quantization in hybrid mode isn't supported for {task}.")
-
-    model = model_cls.from_pretrained(model_id=model_name_or_path, quantization_config=quantization_config)
-    model.save_pretrained(output)
diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index cafad1fb93..2ca9bdef0f 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -187,8 +187,7 @@ def test_exporters_cli_int8(self, task: str, model_type: str):
     def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: int, exp_num_int8: int):
         with TemporaryDirectory() as tmpdir:
             subprocess.run(
-                f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} "
-                f"--task {model_type} --dataset laion/filtered-wit --weight-format int8 {tmpdir}",
+                f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --dataset laion/filtered-wit --weight-format int8 {tmpdir}",
                 shell=True,
                 check=True,
             )

From 13e44b033ff4679070a47048cbc25259e31e3b57 Mon Sep 17 00:00:00 2001
From: Ella Charlaix <ella@huggingface.co>
Date: Wed, 10 Apr 2024 15:35:29 +0200
Subject: [PATCH 6/8] Infer task by loading the diffusers config

---
 optimum/commands/export/openvino.py | 71 +++++++++++++++++++----------
 1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
index d4042b02e5..6ce9e3731e 100644
--- a/optimum/commands/export/openvino.py
+++ b/optimum/commands/export/openvino.py
@@ -20,6 +20,7 @@
 
 from ...exporters import TasksManager
 from ..base import BaseOptimumCLICommand, CommandInfo
+from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available
 
 
 logger = logging.getLogger(__name__)
@@ -209,31 +210,51 @@ def run(self):
             ov_config = OVConfig(quantization_config=quantization_config)
 
         library_name = TasksManager.infer_library_from_model(self.args.model)
-        task = get_relevant_task(self.args.task, self.args.model)
-        saved_dir = self.args.output
 
         if library_name == "diffusers" and ov_config and ov_config.quantization_config.get("dataset"):
-            import tempfile
-            from copy import deepcopy
-            saved_dir = tempfile.mkdtemp()
-            quantization_config = deepcopy(ov_config.quantization_config)
-            ov_config.quantization_config = {}
-
-        # TODO : add input shapes
-        main_export(
-            model_name_or_path=self.args.model,
-            output=saved_dir,
-            task=task,
-            framework=self.args.framework,
-            cache_dir=self.args.cache_dir,
-            trust_remote_code=self.args.trust_remote_code,
-            pad_token_id=self.args.pad_token_id,
-            ov_config=ov_config,
-            stateful=not self.args.disable_stateful,
-            convert_tokenizer=self.args.convert_tokenizer,
-            library_name=self.args.library
-            # **input_shapes,
-        )
 
-        if saved_dir != self.args.output:
-            export_optimized_diffusion_model(saved_dir, self.args.output, task, quantization_config)
+            if not is_diffusers_available():
+                raise ValueError(DIFFUSERS_IMPORT_ERROR.format("Export of diffusers models"))
+
+            from diffusers import DiffusionPipeline
+
+            diffusers_config = DiffusionPipeline.load_config(self.args.model)
+            class_name = diffusers_config.get("_class_name", None)
+
+            if class_name == "LatentConsistencyModelPipeline":
+
+                from optimum.intel import OVLatentConsistencyModelPipeline
+
+                model_cls = OVLatentConsistencyModelPipeline
+
+            elif class_name == "StableDiffusionXLPipeline":
+
+                from optimum.intel import OVStableDiffusionXLPipeline
+
+                model_cls = OVStableDiffusionXLPipeline
+            elif class_name == "StableDiffusionPipeline":
+                from optimum.intel import OVStableDiffusionPipeline
+
+                model_cls = OVStableDiffusionPipeline
+            else:
+                raise NotImplementedError(f"Quantization in hybrid mode isn't supported for class {class_name}.")
+
+            model = model_cls.from_pretrained(self.args.model, export=True, quantization_config=ov_config.quantization_config)
+            model.save_pretrained(self.args.output)
+
+        else:
+            # TODO : add input shapes
+            main_export(
+                model_name_or_path=self.args.model,
+                output=self.args.output,
+                task=self.args.task,
+                framework=self.args.framework,
+                cache_dir=self.args.cache_dir,
+                trust_remote_code=self.args.trust_remote_code,
+                pad_token_id=self.args.pad_token_id,
+                ov_config=ov_config,
+                stateful=not self.args.disable_stateful,
+                convert_tokenizer=self.args.convert_tokenizer,
+                library_name=library_name,
+                # **input_shapes,
+            )

From 6f283282bae849d44fa2e7648ad2d165058c432a Mon Sep 17 00:00:00 2001
From: Liubov Talamanova <liubov.talamanova@intel.com>
Date: Wed, 17 Apr 2024 12:13:00 +0100
Subject: [PATCH 7/8] Fix style

---
 optimum/commands/export/openvino.py  | 9 ++++-----
 tests/openvino/test_exporters_cli.py | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
index 6ce9e3731e..ccbc589413 100644
--- a/optimum/commands/export/openvino.py
+++ b/optimum/commands/export/openvino.py
@@ -19,8 +19,8 @@
 from typing import TYPE_CHECKING, Optional
 
 from ...exporters import TasksManager
-from ..base import BaseOptimumCLICommand, CommandInfo
 from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available
+from ..base import BaseOptimumCLICommand, CommandInfo
 
 
 logger = logging.getLogger(__name__)
@@ -212,7 +212,6 @@ def run(self):
         library_name = TasksManager.infer_library_from_model(self.args.model)
 
         if library_name == "diffusers" and ov_config and ov_config.quantization_config.get("dataset"):
-
             if not is_diffusers_available():
                 raise ValueError(DIFFUSERS_IMPORT_ERROR.format("Export of diffusers models"))
 
@@ -222,13 +221,11 @@ def run(self):
             class_name = diffusers_config.get("_class_name", None)
 
             if class_name == "LatentConsistencyModelPipeline":
-
                 from optimum.intel import OVLatentConsistencyModelPipeline
 
                 model_cls = OVLatentConsistencyModelPipeline
 
             elif class_name == "StableDiffusionXLPipeline":
-
                 from optimum.intel import OVStableDiffusionXLPipeline
 
                 model_cls = OVStableDiffusionXLPipeline
@@ -239,7 +236,9 @@ def run(self):
             else:
                 raise NotImplementedError(f"Quantization in hybrid mode isn't supported for class {class_name}.")
 
-            model = model_cls.from_pretrained(self.args.model, export=True, quantization_config=ov_config.quantization_config)
+            model = model_cls.from_pretrained(
+                self.args.model, export=True, quantization_config=ov_config.quantization_config
+            )
             model.save_pretrained(self.args.output)
 
         else:
diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index 2ca9bdef0f..7d618c530e 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -26,6 +26,7 @@
 
 from optimum.exporters.openvino.__main__ import main_export
 from optimum.intel import (  # noqa
+    OVLatentConsistencyModelPipeline,
     OVModelForAudioClassification,
     OVModelForCausalLM,
     OVModelForFeatureExtraction,
@@ -37,7 +38,6 @@
     OVModelForTokenClassification,
     OVStableDiffusionPipeline,
     OVStableDiffusionXLPipeline,
-    OVLatentConsistencyModelPipeline,
 )
 from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
 from optimum.intel.utils.import_utils import is_openvino_tokenizers_available

From a42de51b85b851e5a93f8a5bbe3a2837b2043f00 Mon Sep 17 00:00:00 2001
From: Liubov Talamanova <liubov.talamanova@intel.com>
Date: Thu, 18 Apr 2024 12:26:18 +0100
Subject: [PATCH 8/8] fix tests

---
 optimum/commands/export/openvino.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
index ccbc589413..40901fbf90 100644
--- a/optimum/commands/export/openvino.py
+++ b/optimum/commands/export/openvino.py
@@ -211,7 +211,12 @@ def run(self):
 
         library_name = TasksManager.infer_library_from_model(self.args.model)
 
-        if library_name == "diffusers" and ov_config and ov_config.quantization_config.get("dataset"):
+        if (
+            library_name == "diffusers"
+            and ov_config
+            and ov_config.quantization_config
+            and ov_config.quantization_config.dataset is not None
+        ):
             if not is_diffusers_available():
                 raise ValueError(DIFFUSERS_IMPORT_ERROR.format("Export of diffusers models"))