update setting activation scale for diffusers (#1110)

eaidova · web-flow · commit 726191fe0c63 · 2025-01-16T10:45:40.000+01:00
* update setting activation scale for diffusers

* fix style

* apply comments
diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py
@@ -46,7 +46,6 @@
     _torch_version,
     _transformers_version,
     compare_versions,
-    is_diffusers_version,
     is_openvino_tokenizers_version,
     is_openvino_version,
     is_tokenizers_version,
@@ -104,10 +103,10 @@ def _set_runtime_options(
 ):
     for model_name in models_and_export_configs.keys():
         _, sub_export_config = models_and_export_configs[model_name]
-        sub_export_config.runtime_options = {}
+        if not hasattr(sub_export_config, "runtime_options"):
+            sub_export_config.runtime_options = {}
         if (
-            "diffusers" in library_name
-            or "text-generation" in task
+            "text-generation" in task
             or ("image-text-to-text" in task and model_name == "language_model")
             or getattr(sub_export_config, "stateful", False)
         ):
@@ -1014,45 +1013,29 @@ def _get_submodels_and_export_configs(
 def get_diffusion_models_for_export_ext(
     pipeline: "DiffusionPipeline", int_dtype: str = "int64", float_dtype: str = "fp32", exporter: str = "openvino"
 ):
-    if is_diffusers_version(">=", "0.29.0"):
-        from diffusers import StableDiffusion3Img2ImgPipeline, StableDiffusion3Pipeline
-
-        sd3_pipes = [StableDiffusion3Pipeline, StableDiffusion3Img2ImgPipeline]
-        if is_diffusers_version(">=", "0.30.0"):
-            from diffusers import StableDiffusion3InpaintPipeline
-
-            sd3_pipes.append(StableDiffusion3InpaintPipeline)
-
-        is_sd3 = isinstance(pipeline, tuple(sd3_pipes))
-    else:
-        is_sd3 = False
-
-    if is_diffusers_version(">=", "0.30.0"):
-        from diffusers import FluxPipeline
-
-        flux_pipes = [FluxPipeline]
-
-        if is_diffusers_version(">=", "0.31.0"):
-            from diffusers import FluxImg2ImgPipeline, FluxInpaintPipeline
-
-            flux_pipes.extend([FluxPipeline, FluxImg2ImgPipeline, FluxInpaintPipeline])
-
-        if is_diffusers_version(">=", "0.32.0"):
-            from diffusers import FluxFillPipeline
-
-            flux_pipes.append(FluxFillPipeline)
-
-        is_flux = isinstance(pipeline, tuple(flux_pipes))
-    else:
-        is_flux = False
-
-    if not is_sd3 and not is_flux:
-        return None, get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter)
-    if is_sd3:
+    is_sdxl = pipeline.__class__.__name__.startswith("StableDiffusionXL")
+    is_sd3 = pipeline.__class__.__name__.startswith("StableDiffusion3")
+    is_flux = pipeline.__class__.__name__.startswith("Flux")
+    is_sd = pipeline.__class__.__name__.startswith("StableDiffusion") and not is_sd3
+    is_lcm = pipeline.__class__.__name__.startswith("LatentConsistencyModel")
+
+    if is_sd or is_sdxl or is_lcm:
+        models_for_export = get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter)
+        if is_sdxl and pipeline.vae.config.force_upcast:
+            models_for_export["vae_encoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "128.0"}
+            models_for_export["vae_decoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "128.0"}
+
+        # only SD 2.1 has overflow issue, it uses different prediction_type than other models
+        if is_sd and pipeline.scheduler.config.prediction_type == "v_prediction":
+            models_for_export["vae_encoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
+            models_for_export["vae_decoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
+
+    elif is_sd3:
         models_for_export = get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype)
-    else:
+    elif is_flux:
         models_for_export = get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype)
-
+    else:
+        raise ValueError(f"Unsupported pipeline type `{pipeline.__class__.__name__}` provided")
     return None, models_for_export
 
 
@@ -1150,6 +1133,7 @@ def get_sd3_models_for_export(pipeline, exporter, int_dtype, float_dtype):
             int_dtype=int_dtype,
             float_dtype=float_dtype,
         )
+        export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
         models_for_export["text_encoder_3"] = (text_encoder_3, export_config)
 
     return models_for_export
@@ -1187,6 +1171,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
     transformer_export_config = export_config_constructor(
         pipeline.transformer.config, int_dtype=int_dtype, float_dtype=float_dtype
     )
+    transformer_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
     models_for_export["transformer"] = (transformer, transformer_export_config)
 
     # VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565
@@ -1202,6 +1187,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
     vae_encoder_export_config = vae_config_constructor(
         vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype
     )
+    vae_encoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
     models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config)
 
     # VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600
@@ -1217,6 +1203,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
     vae_decoder_export_config = vae_config_constructor(
         vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype
     )
+    vae_decoder_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
     models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config)
 
     text_encoder_2 = getattr(pipeline, "text_encoder_2", None)
@@ -1233,6 +1220,7 @@ def get_flux_models_for_export(pipeline, exporter, int_dtype, float_dtype):
             int_dtype=int_dtype,
             float_dtype=float_dtype,
         )
+        export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
         models_for_export["text_encoder_2"] = (text_encoder_2, export_config)
 
     return models_for_export
diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
@@ -63,7 +63,7 @@
 )
 
 from ...exporters.openvino import main_export
-from ..utils.import_utils import is_diffusers_version
+from ..utils.import_utils import is_diffusers_version, is_openvino_version
 from .configuration import OVConfig, OVQuantizationMethod, OVWeightQuantizationConfig
 from .loaders import OVTextualInversionLoaderMixin
 from .modeling_base import OVBaseModel
@@ -73,6 +73,7 @@
     OV_XML_FILE_NAME,
     TemporaryDirectory,
     _print_compiled_model_properties,
+    check_scale_available,
     model_has_dynamic_inputs,
     np_to_pt_generators,
 )
@@ -484,8 +485,15 @@ def _from_pretrained(
             ov_config = kwargs.get("ov_config", {})
             device = kwargs.get("device", "CPU")
             vae_ov_conifg = {**ov_config}
-            if "GPU" in device.upper() and "INFERENCE_PRECISION_HINT" not in vae_ov_conifg:
-                vae_ov_conifg["INFERENCE_PRECISION_HINT"] = "f32"
+            if (
+                "GPU" in device.upper()
+                and "INFERENCE_PRECISION_HINT" not in vae_ov_conifg
+                and is_openvino_version("<=", "2025.0")
+            ):
+                vae_model_path = models["vae_decoder"]
+                required_upcast = check_scale_available(vae_model_path)
+                if required_upcast:
+                    vae_ov_conifg["INFERENCE_PRECISION_HINT"] = "f32"
             for name, path in models.items():
                 if name in kwargs:
                     models[name] = kwargs.pop(name)
@@ -1202,7 +1210,12 @@ def forward(
         return ModelOutput(**model_outputs)
 
     def _compile(self):
-        if "GPU" in self._device and "INFERENCE_PRECISION_HINT" not in self.ov_config:
+        if (
+            "GPU" in self._device
+            and "INFERENCE_PRECISION_HINT" not in self.ov_config
+            and is_openvino_version("<", "2025.0")
+            and check_scale_available(self.model)
+        ):
             self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"})
         super()._compile()
 
@@ -1241,7 +1254,12 @@ def forward(
         return ModelOutput(**model_outputs)
 
     def _compile(self):
-        if "GPU" in self._device and "INFERENCE_PRECISION_HINT" not in self.ov_config:
+        if (
+            "GPU" in self._device
+            and "INFERENCE_PRECISION_HINT" not in self.ov_config
+            and is_openvino_version("<", "2025.0")
+            and check_scale_available(self.model)
+        ):
             self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"})
         super()._compile()
 
diff --git a/optimum/intel/openvino/utils.py b/optimum/intel/openvino/utils.py
@@ -565,3 +565,21 @@ def onexc(func, path, exc):
     def cleanup(self):
         if self._finalizer.detach() or os.path.exists(self.name):
             self._rmtree(self.name, ignore_errors=self._ignore_cleanup_errors)
+
+
+def check_scale_available(model: Union[Model, str, Path]):
+    if isinstance(model, Model):
+        return model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
+    if not Path(model).exists():
+        return False
+    import xml.etree.ElementTree as ET
+
+    tree = ET.parse(model)
+    root = tree.getroot()
+    rt_info = root.find("rt_info")
+    if rt_info is None:
+        return False
+    runtime_options = rt_info.find("runtime_options")
+    if runtime_options is None:
+        return False
+    return runtime_options.find("ACTIVATIONS_SCALE_FACTOR") is not None
diff --git a/tests/openvino/test_export.py b/tests/openvino/test_export.py
@@ -75,6 +75,13 @@ class ExportModelTest(unittest.TestCase):
         "llava": OVModelForVisualCausalLM,
     }
 
+    EXPECTED_DIFFUSERS_SCALE_FACTORS = {
+        "stable-diffusion-xl": {"vae_encoder": "128.0", "vae_decoder": "128.0"},
+        "stable-diffusion-3": {"text_encoder_3": "8.0"},
+        "flux": {"text_encoder_2": "8.0", "transformer": "8.0", "vae_encoder": "8.0", "vae_decoder": "8.0"},
+        "stable-diffusion-xl-refiner": {"vae_encoder": "128.0", "vae_decoder": "128.0"},
+    }
+
     if is_transformers_version(">=", "4.45"):
         SUPPORTED_ARCHITECTURES.update({"stable-diffusion-3": OVStableDiffusion3Pipeline, "flux": OVFluxPipeline})
 
@@ -143,32 +150,33 @@ def _openvino_export(
                     )
 
                 if library_name == "diffusers":
-                    self.assertTrue(
-                        ov_model.vae_encoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
-                    )
-                    self.assertTrue(
-                        ov_model.vae_decoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
-                    )
-                    if hasattr(ov_model, "text_encoder") and ov_model.text_encoder:
-                        self.assertTrue(
-                            ov_model.text_encoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
-                        )
-                    if hasattr(ov_model, "text_encoder_2") and ov_model.text_encoder_2:
-                        self.assertTrue(
-                            ov_model.text_encoder_2.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
-                        )
-                    if hasattr(ov_model, "text_encoder_3") and ov_model.text_encoder_3:
-                        self.assertTrue(
-                            ov_model.text_encoder_3.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
-                        )
-                    if hasattr(ov_model, "unet") and ov_model.unet:
-                        self.assertTrue(
-                            ov_model.unet.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
-                        )
-                    if hasattr(ov_model, "transformer") and ov_model.transformer:
-                        self.assertTrue(
-                            ov_model.transformer.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
-                        )
+                    expected_scale_factors = self.EXPECTED_DIFFUSERS_SCALE_FACTORS.get(model_type, {})
+                    components = [
+                        "unet",
+                        "transformer",
+                        "text_encoder",
+                        "text_encoder_2",
+                        "text_encoder_3",
+                        "vae_encoder",
+                        "vae_decoder",
+                    ]
+                    for component in components:
+                        component_model = getattr(ov_model, component, None)
+                        if component_model is None:
+                            continue
+                        component_scale = expected_scale_factors.get(component)
+                        if component_scale is not None:
+                            self.assertTrue(
+                                component_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
+                            )
+                            self.assertEqual(
+                                component_model.model.get_rt_info()["runtime_options"]["ACTIVATIONS_SCALE_FACTOR"],
+                                component_scale,
+                            )
+                        else:
+                            self.assertFalse(
+                                component_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
+                            )
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
     def test_export(self, model_type: str):